diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log new file mode 100644 index 0000000000000000000000000000000000000000..5bbd6da594f649c913c1d078600d4efde7a0592d --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log @@ -0,0 +1,4111 @@ +# Running on gpub001.delta.ncsa.illinois.edu +# Started at Fri Jul 14 13:29:16 CDT 2023 +# SLURMD_NODENAME=gpub001 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2157595 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2157595 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[001-002,006,008,022,024,026-027,048-051,074,077-079]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[001-002,006,008,022,024,026-027,048-051,074,077-079]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=1052675 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub001 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 +[gpub001:0/64] 2023-07-14 13:30:20,482 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub001:0/64] 2023-07-14 13:30:21,930 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub001:0/64] 2023-07-14 13:30:21,964 (s2t:483) INFO: Vocabulary size: 50002 +[gpub001:0/64] 2023-07-14 13:30:35,251 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub001:0/64] 2023-07-14 13:30:35,277 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub001:0/64] 2023-07-14 13:30:35,962 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub001:0/64] 2023-07-14 13:30:44,311 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 13:30:44,462 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 13:30:44,462 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub001:0/64] 2023-07-14 13:30:44,463 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub001:0/64] 2023-07-14 13:30:44,955 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub001:0/64] 2023-07-14 13:31:11,236 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub001:1052798:1052798 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:1052798:1052798 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:1052798:1052798 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub001:0/64] 2023-07-14 13:31:16,544 (trainer:284) INFO: 49/60epoch started +[gpub001:0/64] 2023-07-14 13:31:16,605 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-14 13:31:34,016 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 13:31:37,332 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 13:31:37,332 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-14 13:31:37,338 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpub050:2739708:2739708 [3] NCCL INFO cudaDriverVersion 12010 +gpub050:2739708:2739708 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2739708:2739708 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2739708:2739778 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2739708:2739778 [3] NCCL INFO Using network IB +gpub050:2739708:2739778 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub050:2739708:2739778 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub050:2739708:2739778 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub050:2739708:2739778 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub050:2739708:2739778 [3] NCCL INFO Connected all rings +gpub050:2739708:2739778 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub050:2739708:2739778 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub050:2739708:2739778 [3] NCCL INFO Connected all trees +gpub050:2739708:2739778 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2739708:2739778 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2739708:2739778 [3] NCCL INFO comm 0x51443e00 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub050:2739706:2739706 [1] NCCL INFO cudaDriverVersion 12010 +gpub050:2739706:2739706 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2739706:2739706 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2739706:2739779 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2739706:2739779 [1] NCCL INFO Using network IB +gpub050:2739706:2739779 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub050:2739706:2739779 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub050:2739706:2739779 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub050:2739706:2739779 [1] NCCL INFO Connected all rings +gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub050:2739706:2739779 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub050:2739706:2739779 [1] NCCL INFO Connected all trees +gpub050:2739706:2739779 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2739706:2739779 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2739706:2739779 [1] NCCL INFO comm 0xb91afa10 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub050:2739707:2739707 [2] NCCL INFO cudaDriverVersion 12010 +gpub050:2739707:2739707 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2739707:2739707 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2739707:2739777 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2739707:2739777 [2] NCCL INFO Using network IB +gpub050:2739707:2739777 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub050:2739707:2739777 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub050:2739707:2739777 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub050:2739707:2739777 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub050:2739707:2739777 [2] NCCL INFO Connected all rings +gpub050:2739707:2739777 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub050:2739707:2739777 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub050:2739707:2739777 [2] NCCL INFO Connected all trees +gpub050:2739707:2739777 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2739707:2739777 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2739707:2739777 [2] NCCL INFO comm 0x9490430 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub050:2739705:2739705 [0] NCCL INFO cudaDriverVersion 12010 +gpub050:2739705:2739705 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2739705:2739705 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2739705:2739780 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2739705:2739780 [0] NCCL INFO Using network IB +gpub050:2739705:2739780 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub050:2739705:2739780 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub050:2739705:2739780 [0] NCCL INFO Connected all rings +gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub050:2739705:2739780 [0] NCCL INFO Connected all trees +gpub050:2739705:2739780 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2739705:2739780 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2739705:2739780 [0] NCCL INFO comm 0xb798f2d0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub078:587387:587387 [1] NCCL INFO cudaDriverVersion 12010 +gpub078:587387:587387 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:587387:587387 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:587387:587522 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:587387:587522 [1] NCCL INFO Using network IB +gpub078:587387:587522 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub078:587387:587522 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:587387:587522 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:587387:587522 [1] NCCL INFO Connected all rings +gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:587387:587522 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:587387:587522 [1] NCCL INFO Connected all trees +gpub078:587387:587522 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:587387:587522 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:587387:587522 [1] NCCL INFO comm 0x8bcb3990 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub078:587386:587386 [0] NCCL INFO cudaDriverVersion 12010 +gpub078:587386:587386 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:587386:587386 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:587386:587521 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:587386:587521 [0] NCCL INFO Using network IB +gpub078:587386:587521 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub078:587386:587521 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:587386:587521 [0] NCCL INFO Connected all rings +gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub078:587386:587521 [0] NCCL INFO Connected all trees +gpub078:587386:587521 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:587386:587521 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:587386:587521 [0] NCCL INFO comm 0xb210a550 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub078:587389:587389 [3] NCCL INFO cudaDriverVersion 12010 +gpub078:587389:587389 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:587389:587389 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:587389:587520 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:587389:587520 [3] NCCL INFO Using network IB +gpub078:587389:587520 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub078:587389:587520 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub078:587389:587520 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:587389:587520 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:587389:587520 [3] NCCL INFO Connected all rings +gpub078:587389:587520 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:587389:587520 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:587389:587520 [3] NCCL INFO Connected all trees +gpub078:587389:587520 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:587389:587520 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:587389:587520 [3] NCCL INFO comm 0xf3a7e40 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub002:2311584:2311584 [0] NCCL INFO cudaDriverVersion 12010 +gpub002:2311584:2311584 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2311584:2311584 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2311584:2311667 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2311584:2311667 [0] NCCL INFO Using network IB +gpub002:2311584:2311667 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub002:2311584:2311667 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub002:2311584:2311667 [0] NCCL INFO Connected all rings +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub002:2311584:2311667 [0] NCCL INFO Connected all trees +gpub002:2311584:2311667 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2311584:2311667 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2311584:2311667 [0] NCCL INFO comm 0x9d597d00 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub002:2311585:2311585 [1] NCCL INFO cudaDriverVersion 12010 +gpub002:2311585:2311585 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2311585:2311585 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2311585:2311664 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2311585:2311664 [1] NCCL INFO Using network IB +gpub002:2311585:2311664 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub002:2311585:2311664 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub002:2311585:2311664 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub002:2311585:2311664 [1] NCCL INFO Connected all rings +gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub002:2311585:2311664 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub002:2311585:2311664 [1] NCCL INFO Connected all trees +gpub002:2311585:2311664 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2311585:2311664 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2311585:2311664 [1] NCCL INFO comm 0x9cc6bd40 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub002:2311586:2311586 [2] NCCL INFO cudaDriverVersion 12010 +gpub002:2311586:2311586 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2311586:2311586 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2311586:2311666 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2311586:2311666 [2] NCCL INFO Using network IB +gpub002:2311586:2311666 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub002:2311586:2311666 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub002:2311586:2311666 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub002:2311586:2311666 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub002:2311586:2311666 [2] NCCL INFO Connected all rings +gpub002:2311586:2311666 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub002:2311586:2311666 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub002:2311586:2311666 [2] NCCL INFO Connected all trees +gpub002:2311586:2311666 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2311586:2311666 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2311586:2311666 [2] NCCL INFO comm 0x8ee58800 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub002:2311587:2311587 [3] NCCL INFO cudaDriverVersion 12010 +gpub002:2311587:2311587 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2311587:2311587 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2311587:2311665 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2311587:2311665 [3] NCCL INFO Using network IB +gpub002:2311587:2311665 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub002:2311587:2311665 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub002:2311587:2311665 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub002:2311587:2311665 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub002:2311587:2311665 [3] NCCL INFO Connected all rings +gpub002:2311587:2311665 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub002:2311587:2311665 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub002:2311587:2311665 [3] NCCL INFO Connected all trees +gpub002:2311587:2311665 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2311587:2311665 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2311587:2311665 [3] NCCL INFO comm 0x4fabf1f0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub078:587388:587388 [2] NCCL INFO cudaDriverVersion 12010 +gpub078:587388:587388 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:587388:587388 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:587388:587523 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:587388:587523 [2] NCCL INFO Using network IB +gpub078:587388:587523 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub078:587388:587523 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub078:587388:587523 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:587388:587523 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:587388:587523 [2] NCCL INFO Connected all rings +gpub078:587388:587523 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:587388:587523 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:587388:587523 [2] NCCL INFO Connected all trees +gpub078:587388:587523 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:587388:587523 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:587388:587523 [2] NCCL INFO comm 0x4f14aa50 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub051:3424421:3424421 [3] NCCL INFO cudaDriverVersion 12010 +gpub051:3424421:3424421 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3424421:3424421 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3424421:3424552 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3424421:3424552 [3] NCCL INFO Using network IB +gpub051:3424421:3424552 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub051:3424421:3424552 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub051:3424421:3424552 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub051:3424421:3424552 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub051:3424421:3424552 [3] NCCL INFO Connected all rings +gpub051:3424421:3424552 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub051:3424421:3424552 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub051:3424421:3424552 [3] NCCL INFO Connected all trees +gpub051:3424421:3424552 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3424421:3424552 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3424421:3424552 [3] NCCL INFO comm 0x9ee4290 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub051:3424419:3424419 [1] NCCL INFO cudaDriverVersion 12010 +gpub051:3424419:3424419 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3424419:3424419 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3424419:3424553 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3424419:3424553 [1] NCCL INFO Using network IB +gpub051:3424419:3424553 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub051:3424419:3424553 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub051:3424419:3424553 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub051:3424419:3424553 [1] NCCL INFO Connected all rings +gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub051:3424419:3424553 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub051:3424419:3424553 [1] NCCL INFO Connected all trees +gpub051:3424419:3424553 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3424419:3424553 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3424419:3424553 [1] NCCL INFO comm 0xb60902d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub051:3424418:3424418 [0] NCCL INFO cudaDriverVersion 12010 +gpub051:3424418:3424418 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3424418:3424418 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3424418:3424555 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3424418:3424555 [0] NCCL INFO Using network IB +gpub051:3424418:3424555 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub051:3424418:3424555 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub051:3424418:3424555 [0] NCCL INFO Connected all rings +gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub051:3424418:3424555 [0] NCCL INFO Connected all trees +gpub051:3424418:3424555 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3424418:3424555 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3424418:3424555 [0] NCCL INFO comm 0x8ebe3540 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub051:3424420:3424420 [2] NCCL INFO cudaDriverVersion 12010 +gpub051:3424420:3424420 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3424420:3424420 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3424420:3424554 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3424420:3424554 [2] NCCL INFO Using network IB +gpub051:3424420:3424554 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub051:3424420:3424554 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub051:3424420:3424554 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub051:3424420:3424554 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub051:3424420:3424554 [2] NCCL INFO Connected all rings +gpub051:3424420:3424554 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub051:3424420:3424554 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub051:3424420:3424554 [2] NCCL INFO Connected all trees +gpub051:3424420:3424554 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3424420:3424554 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3424420:3424554 [2] NCCL INFO comm 0x4f403790 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub027:4034780:4034780 [1] NCCL INFO cudaDriverVersion 12010 +gpub027:4034780:4034780 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:4034780:4034780 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:4034780:4034863 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:4034780:4034863 [1] NCCL INFO Using network IB +gpub027:4034780:4034863 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub027:4034780:4034863 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub027:4034780:4034863 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub027:4034780:4034863 [1] NCCL INFO Connected all rings +gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub027:4034780:4034863 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub027:4034780:4034863 [1] NCCL INFO Connected all trees +gpub027:4034780:4034863 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:4034780:4034863 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:4034780:4034863 [1] NCCL INFO comm 0x9afc8490 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub027:4034779:4034779 [0] NCCL INFO cudaDriverVersion 12010 +gpub027:4034779:4034779 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:4034779:4034779 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:4034779:4034862 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:4034779:4034862 [0] NCCL INFO Using network IB +gpub027:4034779:4034862 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub027:4034779:4034862 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub027:4034779:4034862 [0] NCCL INFO Connected all rings +gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub027:4034779:4034862 [0] NCCL INFO Connected all trees +gpub027:4034779:4034862 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:4034779:4034862 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:4034779:4034862 [0] NCCL INFO comm 0xb5e6b1f0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub027:4034782:4034782 [3] NCCL INFO cudaDriverVersion 12010 +gpub027:4034782:4034782 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:4034782:4034782 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:4034782:4034861 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:4034782:4034861 [3] NCCL INFO Using network IB +gpub027:4034782:4034861 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub027:4034782:4034861 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub027:4034782:4034861 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub027:4034782:4034861 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub027:4034782:4034861 [3] NCCL INFO Connected all rings +gpub027:4034782:4034861 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub027:4034782:4034861 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub027:4034782:4034861 [3] NCCL INFO Connected all trees +gpub027:4034782:4034861 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:4034782:4034861 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:4034782:4034861 [3] NCCL INFO comm 0x4f996350 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub027:4034781:4034781 [2] NCCL INFO cudaDriverVersion 12010 +gpub027:4034781:4034781 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:4034781:4034781 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:4034781:4034864 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:4034781:4034864 [2] NCCL INFO Using network IB +gpub027:4034781:4034864 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub027:4034781:4034864 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub027:4034781:4034864 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub027:4034781:4034864 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub027:4034781:4034864 [2] NCCL INFO Connected all rings +gpub027:4034781:4034864 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub027:4034781:4034864 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub027:4034781:4034864 [2] NCCL INFO Connected all trees +gpub027:4034781:4034864 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:4034781:4034864 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:4034781:4034864 [2] NCCL INFO comm 0x8d940630 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub077:2521721:2521721 [3] NCCL INFO cudaDriverVersion 12010 +gpub077:2521721:2521721 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:2521721:2521721 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:2521721:2521790 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:2521721:2521790 [3] NCCL INFO Using network IB +gpub077:2521721:2521790 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub077:2521721:2521790 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub077:2521721:2521790 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub077:2521721:2521790 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub077:2521721:2521790 [3] NCCL INFO Connected all rings +gpub077:2521721:2521790 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub077:2521721:2521790 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub077:2521721:2521790 [3] NCCL INFO Connected all trees +gpub077:2521721:2521790 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:2521721:2521790 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:2521721:2521790 [3] NCCL INFO comm 0x500bb780 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub079:3396495:3396495 [2] NCCL INFO cudaDriverVersion 12010 +gpub079:3396495:3396495 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:3396495:3396495 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:3396495:3396576 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:3396495:3396576 [2] NCCL INFO Using network IB +gpub079:3396495:3396576 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub079:3396495:3396576 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub079:3396495:3396576 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub079:3396495:3396576 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub079:3396495:3396576 [2] NCCL INFO Connected all rings +gpub079:3396495:3396576 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub079:3396495:3396576 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub079:3396495:3396576 [2] NCCL INFO Connected all trees +gpub079:3396495:3396576 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:3396495:3396576 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:3396495:3396576 [2] NCCL INFO comm 0x8e939be0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub006:1859755:1859755 [3] NCCL INFO cudaDriverVersion 12010 +gpub006:1859755:1859755 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:1859755:1859755 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:1859755:1859833 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0> +gpub006:1859755:1859833 [3] NCCL INFO Using network IB +gpub006:1859755:1859833 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub006:1859755:1859833 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub006:1859755:1859833 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub006:1859755:1859833 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub006:1859755:1859833 [3] NCCL INFO Connected all rings +gpub006:1859755:1859833 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub006:1859755:1859833 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub006:1859755:1859833 [3] NCCL INFO Connected all trees +gpub006:1859755:1859833 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:1859755:1859833 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:1859755:1859833 [3] NCCL INFO comm 0x50847890 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub049:277663:277663 [1] NCCL INFO cudaDriverVersion 12010 +gpub049:277663:277663 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:277663:277663 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:277663:277744 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:277663:277744 [1] NCCL INFO Using network IB +gpub049:277663:277744 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub049:277663:277744 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub049:277663:277744 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub049:277663:277744 [1] NCCL INFO Connected all rings +gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub049:277663:277744 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub049:277663:277744 [1] NCCL INFO Connected all trees +gpub049:277663:277744 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:277663:277744 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:277663:277744 [1] NCCL INFO comm 0xb77e62d0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:3396496:3396496 [3] NCCL INFO cudaDriverVersion 12010 +gpub079:3396496:3396496 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:3396496:3396496 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:3396496:3396574 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:3396496:3396574 [3] NCCL INFO Using network IB +gpub079:3396496:3396574 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub079:3396496:3396574 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub079:3396496:3396574 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub079:3396496:3396574 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub079:3396496:3396574 [3] NCCL INFO Connected all rings +gpub079:3396496:3396574 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub079:3396496:3396574 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub079:3396496:3396574 [3] NCCL INFO Connected all trees +gpub079:3396496:3396574 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:3396496:3396574 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:3396496:3396574 [3] NCCL INFO comm 0x51317510 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub074:4055358:4055358 [2] NCCL INFO cudaDriverVersion 12010 +gpub074:4055358:4055358 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4055358:4055358 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4055358:4055429 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4055358:4055429 [2] NCCL INFO Using network IB +gpub074:4055358:4055429 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub074:4055358:4055429 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub074:4055358:4055429 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub074:4055358:4055429 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub074:4055358:4055429 [2] NCCL INFO Connected all rings +gpub074:4055358:4055429 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub074:4055358:4055429 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub049:277665:277665 [3] NCCL INFO cudaDriverVersion 12010 +gpub049:277665:277665 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:277665:277665 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:277665:277743 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:277665:277743 [3] NCCL INFO Using network IB +gpub049:277665:277743 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub049:277665:277743 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub049:277665:277743 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub049:277665:277743 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub049:277665:277743 [3] NCCL INFO Connected all rings +gpub049:277665:277743 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub049:277665:277743 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub074:4055358:4055429 [2] NCCL INFO Connected all trees +gpub074:4055358:4055429 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:4055358:4055429 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:4055358:4055429 [2] NCCL INFO comm 0x5076e6a0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub049:277665:277743 [3] NCCL INFO Connected all trees +gpub049:277665:277743 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:277665:277743 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:277665:277743 [3] NCCL INFO comm 0x9d1a0c70 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub049:277662:277662 [0] NCCL INFO cudaDriverVersion 12010 +gpub049:277662:277662 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:277662:277662 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:277662:277745 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:277662:277745 [0] NCCL INFO Using network IB +gpub049:277662:277745 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub049:277662:277745 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub049:277662:277745 [0] NCCL INFO Connected all rings +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub049:277662:277745 [0] NCCL INFO Connected all trees +gpub049:277662:277745 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:277662:277745 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:277662:277745 [0] NCCL INFO comm 0x50033560 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub074:4055359:4055359 [3] NCCL INFO cudaDriverVersion 12010 +gpub074:4055359:4055359 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4055359:4055359 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4055359:4055428 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4055359:4055428 [3] NCCL INFO Using network IB +gpub074:4055359:4055428 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub074:4055359:4055428 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub074:4055359:4055428 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub074:4055359:4055428 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub074:4055359:4055428 [3] NCCL INFO Connected all rings +gpub074:4055359:4055428 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub074:4055359:4055428 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub074:4055359:4055428 [3] NCCL INFO Connected all trees +gpub074:4055359:4055428 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:4055359:4055428 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:4055359:4055428 [3] NCCL INFO comm 0xb59ce3d0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub077:2521719:2521719 [1] NCCL INFO cudaDriverVersion 12010 +gpub077:2521719:2521719 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:2521719:2521719 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:2521719:2521792 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:2521719:2521792 [1] NCCL INFO Using network IB +gpub077:2521719:2521792 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub077:2521719:2521792 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub077:2521719:2521792 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub077:2521719:2521792 [1] NCCL INFO Connected all rings +gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub077:2521719:2521792 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub077:2521719:2521792 [1] NCCL INFO Connected all trees +gpub077:2521719:2521792 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:2521719:2521792 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:2521719:2521792 [1] NCCL INFO comm 0xb802530 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:3396493:3396493 [0] NCCL INFO cudaDriverVersion 12010 +gpub079:3396493:3396493 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:3396493:3396493 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:3396493:3396573 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:3396493:3396573 [0] NCCL INFO Using network IB +gpub079:3396493:3396573 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub079:3396493:3396573 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub079:3396493:3396573 [0] NCCL INFO Connected all rings +gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub079:3396493:3396573 [0] NCCL INFO Connected all trees +gpub079:3396493:3396573 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:3396493:3396573 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:3396493:3396573 [0] NCCL INFO comm 0x4f9d83d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub026:2781297:2781297 [1] NCCL INFO cudaDriverVersion 12010 +gpub026:2781297:2781297 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2781297:2781297 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2781297:2781382 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2781297:2781382 [1] NCCL INFO Using network IB +gpub026:2781297:2781382 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub026:2781297:2781382 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub026:2781297:2781382 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub026:2781297:2781382 [1] NCCL INFO Connected all rings +gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub026:2781297:2781382 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub026:2781297:2781382 [1] NCCL INFO Connected all trees +gpub026:2781297:2781382 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2781297:2781382 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2781297:2781382 [1] NCCL INFO comm 0x50ca5540 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub006:1859754:1859754 [2] NCCL INFO cudaDriverVersion 12010 +gpub006:1859754:1859754 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:1859754:1859754 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:1859754:1859834 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0> +gpub006:1859754:1859834 [2] NCCL INFO Using network IB +gpub006:1859754:1859834 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub006:1859754:1859834 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub006:1859754:1859834 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub006:1859754:1859834 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub006:1859754:1859834 [2] NCCL INFO Connected all rings +gpub006:1859754:1859834 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub006:1859754:1859834 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub006:1859754:1859834 [2] NCCL INFO Connected all trees +gpub006:1859754:1859834 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:1859754:1859834 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:1859754:1859834 [2] NCCL INFO comm 0xa42aef0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub074:4055357:4055357 [1] NCCL INFO cudaDriverVersion 12010 +gpub074:4055357:4055357 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4055357:4055357 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4055357:4055427 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4055357:4055427 [1] NCCL INFO Using network IB +gpub074:4055357:4055427 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub074:4055357:4055427 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub074:4055357:4055427 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub074:4055357:4055427 [1] NCCL INFO Connected all rings +gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub074:4055357:4055427 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub074:4055357:4055427 [1] NCCL INFO Connected all trees +gpub074:4055357:4055427 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:4055357:4055427 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:4055357:4055427 [1] NCCL INFO comm 0x8b30550 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:3396494:3396494 [1] NCCL INFO cudaDriverVersion 12010 +gpub079:3396494:3396494 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:3396494:3396494 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:3396494:3396575 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:3396494:3396575 [1] NCCL INFO Using network IB +gpub079:3396494:3396575 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub079:3396494:3396575 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub079:3396494:3396575 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub079:3396494:3396575 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub079:3396494:3396575 [1] NCCL INFO Connected all rings +gpub079:3396494:3396575 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub079:3396494:3396575 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub079:3396494:3396575 [1] NCCL INFO Connected all trees +gpub079:3396494:3396575 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:3396494:3396575 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:3396494:3396575 [1] NCCL INFO comm 0xc163a90 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub026:2781299:2781299 [3] NCCL INFO cudaDriverVersion 12010 +gpub026:2781299:2781299 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2781299:2781299 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2781299:2781379 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2781299:2781379 [3] NCCL INFO Using network IB +gpub026:2781299:2781379 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub026:2781299:2781379 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub026:2781299:2781379 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub026:2781299:2781379 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub026:2781299:2781379 [3] NCCL INFO Connected all rings +gpub026:2781299:2781379 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub026:2781299:2781379 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub026:2781299:2781379 [3] NCCL INFO Connected all trees +gpub026:2781299:2781379 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2781299:2781379 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2781299:2781379 [3] NCCL INFO comm 0x507c61a0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub077:2521720:2521720 [2] NCCL INFO cudaDriverVersion 12010 +gpub077:2521720:2521720 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:2521720:2521720 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:2521720:2521791 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:2521720:2521791 [2] NCCL INFO Using network IB +gpub077:2521720:2521791 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub077:2521720:2521791 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub077:2521720:2521791 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub077:2521720:2521791 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub077:2521720:2521791 [2] NCCL INFO Connected all rings +gpub077:2521720:2521791 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub077:2521720:2521791 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub077:2521720:2521791 [2] NCCL INFO Connected all trees +gpub077:2521720:2521791 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:2521720:2521791 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:2521720:2521791 [2] NCCL INFO comm 0xa4c559d0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub006:1859752:1859752 [0] NCCL INFO cudaDriverVersion 12010 +gpub006:1859752:1859752 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:1859752:1859752 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:1859752:1859836 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0> +gpub006:1859752:1859836 [0] NCCL INFO Using network IB +gpub006:1859752:1859836 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub006:1859752:1859836 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub006:1859752:1859836 [0] NCCL INFO Connected all rings +gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub006:1859752:1859836 [0] NCCL INFO Connected all trees +gpub006:1859752:1859836 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:1859752:1859836 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:1859752:1859836 [0] NCCL INFO comm 0x50278a40 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub008:2990364:2990364 [0] NCCL INFO cudaDriverVersion 12010 +gpub008:2990364:2990364 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2990364:2990364 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2990364:2990440 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2990364:2990440 [0] NCCL INFO Using network IB +gpub008:2990364:2990440 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub008:2990364:2990440 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub008:2990364:2990440 [0] NCCL INFO Connected all rings +gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub008:2990364:2990440 [0] NCCL INFO Connected all trees +gpub008:2990364:2990440 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2990364:2990440 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2990364:2990440 [0] NCCL INFO comm 0xa229210 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub077:2521718:2521718 [0] NCCL INFO cudaDriverVersion 12010 +gpub077:2521718:2521718 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:2521718:2521718 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:2521718:2521789 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:2521718:2521789 [0] NCCL INFO Using network IB +gpub077:2521718:2521789 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub077:2521718:2521789 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub077:2521718:2521789 [0] NCCL INFO Connected all rings +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub077:2521718:2521789 [0] NCCL INFO Connected all trees +gpub077:2521718:2521789 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:2521718:2521789 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:2521718:2521789 [0] NCCL INFO comm 0x5162ad90 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub008:2990366:2990366 [2] NCCL INFO cudaDriverVersion 12010 +gpub008:2990366:2990366 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2990366:2990366 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2990366:2990443 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2990366:2990443 [2] NCCL INFO Using network IB +gpub008:2990366:2990443 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub008:2990366:2990443 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub008:2990366:2990443 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub008:2990366:2990443 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub008:2990366:2990443 [2] NCCL INFO Connected all rings +gpub008:2990366:2990443 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub008:2990366:2990443 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub008:2990366:2990443 [2] NCCL INFO Connected all trees +gpub008:2990366:2990443 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2990366:2990443 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2990366:2990443 [2] NCCL INFO comm 0xb6d2c880 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub074:4055356:4055356 [0] NCCL INFO cudaDriverVersion 12010 +gpub074:4055356:4055356 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4055356:4055356 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4055356:4055430 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4055356:4055430 [0] NCCL INFO Using network IB +gpub074:4055356:4055430 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub074:4055356:4055430 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub074:4055356:4055430 [0] NCCL INFO Connected all rings +gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:4055356:4055430 [0] NCCL INFO Connected all trees +gpub074:4055356:4055430 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:4055356:4055430 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:4055356:4055430 [0] NCCL INFO comm 0x9c0ae50 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub006:1859753:1859753 [1] NCCL INFO cudaDriverVersion 12010 +gpub006:1859753:1859753 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:1859753:1859753 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:1859753:1859835 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0> +gpub006:1859753:1859835 [1] NCCL INFO Using network IB +gpub006:1859753:1859835 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub006:1859753:1859835 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub006:1859753:1859835 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub006:1859753:1859835 [1] NCCL INFO Connected all rings +gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub006:1859753:1859835 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub006:1859753:1859835 [1] NCCL INFO Connected all trees +gpub006:1859753:1859835 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:1859753:1859835 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:1859753:1859835 [1] NCCL INFO comm 0xa3eb5f0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub001:1052799:1052799 [1] NCCL INFO cudaDriverVersion 12010 +gpub001:1052799:1052799 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:1052799:1052799 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:1052799:1052880 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:1052799:1052880 [1] NCCL INFO Using network IB +gpub001:1052799:1052880 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub001:1052799:1052880 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub001:1052799:1052880 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub001:1052799:1052880 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub001:1052799:1052880 [1] NCCL INFO Connected all rings +gpub001:1052799:1052880 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub001:1052799:1052880 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub001:1052799:1052880 [1] NCCL INFO Connected all trees +gpub001:1052799:1052880 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:1052799:1052880 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:1052799:1052880 [1] NCCL INFO comm 0x50befe70 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub001:1052801:1052801 [3] NCCL INFO cudaDriverVersion 12010 +gpub001:1052801:1052801 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:1052801:1052801 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:1052801:1052879 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:1052801:1052879 [3] NCCL INFO Using network IB +gpub001:1052801:1052879 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub001:1052801:1052879 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub001:1052801:1052879 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub001:1052801:1052879 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub001:1052801:1052879 [3] NCCL INFO Connected all rings +gpub001:1052801:1052879 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub001:1052801:1052879 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub001:1052801:1052879 [3] NCCL INFO Connected all trees +gpub001:1052801:1052879 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:1052801:1052879 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:1052801:1052879 [3] NCCL INFO comm 0xb78dc020 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub026:2781298:2781298 [2] NCCL INFO cudaDriverVersion 12010 +gpub026:2781298:2781298 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2781298:2781298 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2781298:2781380 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2781298:2781380 [2] NCCL INFO Using network IB +gpub026:2781298:2781380 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub026:2781298:2781380 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub026:2781298:2781380 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub026:2781298:2781380 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub026:2781298:2781380 [2] NCCL INFO Connected all rings +gpub026:2781298:2781380 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub026:2781298:2781380 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub049:277664:277664 [2] NCCL INFO cudaDriverVersion 12010 +gpub049:277664:277664 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:277664:277664 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:277664:277742 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:277664:277742 [2] NCCL INFO Using network IB +gpub049:277664:277742 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub049:277664:277742 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub049:277664:277742 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub049:277664:277742 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub049:277664:277742 [2] NCCL INFO Connected all rings +gpub049:277664:277742 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub049:277664:277742 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub049:277664:277742 [2] NCCL INFO Connected all trees +gpub026:2781298:2781380 [2] NCCL INFO Connected all trees +gpub026:2781298:2781380 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2781298:2781380 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2781298:2781380 [2] NCCL INFO comm 0x8e36d550 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub049:277664:277742 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:277664:277742 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:277664:277742 [2] NCCL INFO comm 0x92096c0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub008:2990367:2990367 [3] NCCL INFO cudaDriverVersion 12010 +gpub008:2990367:2990367 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2990367:2990367 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2990367:2990442 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2990367:2990442 [3] NCCL INFO Using network IB +gpub008:2990367:2990442 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub008:2990367:2990442 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub008:2990367:2990442 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub008:2990367:2990442 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub008:2990367:2990442 [3] NCCL INFO Connected all rings +gpub008:2990367:2990442 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub008:2990367:2990442 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub008:2990367:2990442 [3] NCCL INFO Connected all trees +gpub008:2990367:2990442 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2990367:2990442 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2990367:2990442 [3] NCCL INFO comm 0x4fa470f0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub001:1052798:1052882 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:1052798:1052882 [0] NCCL INFO Using network IB +gpub001:1052798:1052882 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub001:1052798:1052882 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub001:1052798:1052882 [0] NCCL INFO Connected all rings +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:1052798:1052882 [0] NCCL INFO Connected all trees +gpub001:1052798:1052882 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:1052798:1052882 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:1052798:1052882 [0] NCCL INFO comm 0x50dde690 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:106667:106667 [1] NCCL INFO cudaDriverVersion 12010 +gpub022:106667:106667 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:106667:106667 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:106667:106747 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:106667:106747 [1] NCCL INFO Using network IB +gpub022:106667:106747 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub022:106667:106747 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub022:106667:106747 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub022:106667:106747 [1] NCCL INFO Connected all rings +gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub022:106667:106747 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub022:106667:106747 [1] NCCL INFO Connected all trees +gpub022:106667:106747 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:106667:106747 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:106667:106747 [1] NCCL INFO comm 0x8f3e3330 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:2990365:2990365 [1] NCCL INFO cudaDriverVersion 12010 +gpub008:2990365:2990365 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2990365:2990365 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2990365:2990441 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2990365:2990441 [1] NCCL INFO Using network IB +gpub008:2990365:2990441 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub008:2990365:2990441 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub008:2990365:2990441 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub008:2990365:2990441 [1] NCCL INFO Connected all rings +gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub008:2990365:2990441 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub008:2990365:2990441 [1] NCCL INFO Connected all trees +gpub008:2990365:2990441 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2990365:2990441 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2990365:2990441 [1] NCCL INFO comm 0x98eccf0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub024:541987:541987 [2] NCCL INFO cudaDriverVersion 12010 +gpub024:541987:541987 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:541987:541987 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:541987:542066 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:541987:542066 [2] NCCL INFO Using network IB +gpub024:541987:542066 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub024:541987:542066 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub024:541987:542066 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub024:541987:542066 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub024:541987:542066 [2] NCCL INFO Connected all rings +gpub024:541987:542066 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub024:541987:542066 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub024:541987:542066 [2] NCCL INFO Connected all trees +gpub024:541987:542066 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:541987:542066 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:541987:542066 [2] NCCL INFO comm 0x505ff970 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub001:1052800:1052800 [2] NCCL INFO cudaDriverVersion 12010 +gpub001:1052800:1052800 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:1052800:1052800 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:1052800:1052881 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:1052800:1052881 [2] NCCL INFO Using network IB +gpub001:1052800:1052881 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub001:1052800:1052881 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub001:1052800:1052881 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub001:1052800:1052881 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub001:1052800:1052881 [2] NCCL INFO Connected all rings +gpub001:1052800:1052881 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub001:1052800:1052881 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub001:1052800:1052881 [2] NCCL INFO Connected all trees +gpub001:1052800:1052881 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:1052800:1052881 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:1052800:1052881 [2] NCCL INFO comm 0x8e66c510 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub022:106669:106669 [3] NCCL INFO cudaDriverVersion 12010 +gpub022:106669:106669 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:106669:106669 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:106669:106748 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:106669:106748 [3] NCCL INFO Using network IB +gpub022:106669:106748 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub022:106669:106748 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub022:106669:106748 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub022:106669:106748 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub022:106669:106748 [3] NCCL INFO Connected all rings +gpub022:106669:106748 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub022:106669:106748 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub022:106669:106748 [3] NCCL INFO Connected all trees +gpub022:106669:106748 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:106669:106748 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:106669:106748 [3] NCCL INFO comm 0x4f1d7190 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub026:2781296:2781296 [0] NCCL INFO cudaDriverVersion 12010 +gpub026:2781296:2781296 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2781296:2781296 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2781296:2781381 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2781296:2781381 [0] NCCL INFO Using network IB +gpub026:2781296:2781381 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub026:2781296:2781381 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub026:2781296:2781381 [0] NCCL INFO Connected all rings +gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub026:2781296:2781381 [0] NCCL INFO Connected all trees +gpub026:2781296:2781381 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2781296:2781381 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2781296:2781381 [0] NCCL INFO comm 0xaebd9cd0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub024:541985:541985 [0] NCCL INFO cudaDriverVersion 12010 +gpub024:541985:541985 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:541985:541985 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:541985:542068 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:541985:542068 [0] NCCL INFO Using network IB +gpub024:541985:542068 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub024:541985:542068 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub024:541985:542068 [0] NCCL INFO Connected all rings +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub024:541985:542068 [0] NCCL INFO Connected all trees +gpub024:541985:542068 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:541985:542068 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:541985:542068 [0] NCCL INFO comm 0x4ffe64c0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:106668:106668 [2] NCCL INFO cudaDriverVersion 12010 +gpub022:106668:106668 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:106668:106668 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:106668:106749 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:106668:106749 [2] NCCL INFO Using network IB +gpub022:106668:106749 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub022:106668:106749 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub022:106668:106749 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub022:106668:106749 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub022:106668:106749 [2] NCCL INFO Connected all rings +gpub022:106668:106749 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub022:106668:106749 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub022:106668:106749 [2] NCCL INFO Connected all trees +gpub022:106668:106749 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:106668:106749 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:106668:106749 [2] NCCL INFO comm 0x4fd27690 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub024:541988:541988 [3] NCCL INFO cudaDriverVersion 12010 +gpub024:541988:541988 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:541988:541988 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:541988:542067 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:541988:542067 [3] NCCL INFO Using network IB +gpub024:541988:542067 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub024:541988:542067 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub024:541988:542067 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub024:541988:542067 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub024:541988:542067 [3] NCCL INFO Connected all rings +gpub024:541988:542067 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub024:541988:542067 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub024:541988:542067 [3] NCCL INFO Connected all trees +gpub024:541988:542067 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:541988:542067 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:541988:542067 [3] NCCL INFO comm 0xb65fe8d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub048:3933786:3933786 [3] NCCL INFO cudaDriverVersion 12010 +gpub048:3933786:3933786 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0> +gpub048:3933786:3933786 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub048:3933786:3933849 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0> +gpub048:3933786:3933849 [3] NCCL INFO Using network IB +gpub048:3933786:3933849 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub048:3933786:3933849 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub048:3933786:3933849 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub048:3933786:3933849 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub048:3933786:3933849 [3] NCCL INFO Connected all rings +gpub048:3933786:3933849 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub048:3933786:3933849 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub048:3933786:3933849 [3] NCCL INFO Connected all trees +gpub048:3933786:3933849 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub048:3933786:3933849 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub048:3933786:3933849 [3] NCCL INFO comm 0x8e08e110 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub048:3933785:3933785 [2] NCCL INFO cudaDriverVersion 12010 +gpub048:3933785:3933785 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0> +gpub048:3933785:3933785 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub048:3933785:3933846 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0> +gpub048:3933785:3933846 [2] NCCL INFO Using network IB +gpub048:3933785:3933846 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub048:3933785:3933846 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub048:3933785:3933846 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub048:3933785:3933846 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub048:3933785:3933846 [2] NCCL INFO Connected all rings +gpub048:3933785:3933846 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub048:3933785:3933846 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub048:3933785:3933846 [2] NCCL INFO Connected all trees +gpub048:3933785:3933846 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub048:3933785:3933846 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub048:3933785:3933846 [2] NCCL INFO comm 0xb9dce190 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub048:3933784:3933784 [1] NCCL INFO cudaDriverVersion 12010 +gpub048:3933784:3933784 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0> +gpub048:3933784:3933784 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub048:3933784:3933848 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0> +gpub048:3933784:3933848 [1] NCCL INFO Using network IB +gpub048:3933784:3933848 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub048:3933784:3933848 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub048:3933784:3933848 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub048:3933784:3933848 [1] NCCL INFO Connected all rings +gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub048:3933784:3933848 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub048:3933784:3933848 [1] NCCL INFO Connected all trees +gpub048:3933784:3933848 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub048:3933784:3933848 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub048:3933784:3933848 [1] NCCL INFO comm 0x9d3ee1d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub024:541986:541986 [1] NCCL INFO cudaDriverVersion 12010 +gpub024:541986:541986 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:541986:541986 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:541986:542065 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:541986:542065 [1] NCCL INFO Using network IB +gpub024:541986:542065 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub024:541986:542065 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub024:541986:542065 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub024:541986:542065 [1] NCCL INFO Connected all rings +gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub024:541986:542065 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub024:541986:542065 [1] NCCL INFO Connected all trees +gpub024:541986:542065 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:541986:542065 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:541986:542065 [1] NCCL INFO comm 0x8c61ca80 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub048:3933783:3933783 [0] NCCL INFO cudaDriverVersion 12010 +gpub048:3933783:3933783 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0> +gpub048:3933783:3933783 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub048:3933783:3933847 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0> +gpub048:3933783:3933847 [0] NCCL INFO Using network IB +gpub048:3933783:3933847 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub048:3933783:3933847 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub048:3933783:3933847 [0] NCCL INFO Connected all rings +gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub048:3933783:3933847 [0] NCCL INFO Connected all trees +gpub048:3933783:3933847 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub048:3933783:3933847 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub048:3933783:3933847 [0] NCCL INFO comm 0x8d070d10 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:106666:106666 [0] NCCL INFO cudaDriverVersion 12010 +gpub022:106666:106666 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:106666:106666 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:106666:106746 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:106666:106746 [0] NCCL INFO Using network IB +gpub022:106666:106746 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub022:106666:106746 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub022:106666:106746 [0] NCCL INFO Connected all rings +gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:106666:106746 [0] NCCL INFO Connected all trees +gpub022:106666:106746 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:106666:106746 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:106666:106746 [0] NCCL INFO comm 0x4ef16f50 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub001:0/64] 2023-07-14 13:38:31,549 (trainer:732) INFO: 49epoch:train:1-100batch: iter_time=1.254, forward_time=0.216, loss_ctc=75.424, loss_att=56.205, acc=0.707, loss=61.970, backward_time=1.042, grad_norm=126.161, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.157e-05, train_time=8.698 +[gpub001:0/64] 2023-07-14 13:40:47,428 (trainer:732) INFO: 49epoch:train:101-200batch: iter_time=1.324e-04, forward_time=0.143, loss_ctc=78.109, loss_att=58.369, acc=0.696, loss=64.291, backward_time=1.027, grad_norm=156.563, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.156e-05, train_time=2.718 +[gpub001:0/64] 2023-07-14 13:43:04,326 (trainer:732) INFO: 49epoch:train:201-300batch: iter_time=1.368e-04, forward_time=0.144, loss_ctc=71.274, loss_att=53.833, acc=0.706, loss=59.065, backward_time=1.034, grad_norm=117.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.156e-05, train_time=2.738 +[gpub001:0/64] 2023-07-14 13:45:19,908 (trainer:732) INFO: 49epoch:train:301-400batch: iter_time=1.152e-04, forward_time=0.140, loss_ctc=82.935, loss_att=67.130, acc=0.686, loss=71.872, backward_time=1.023, grad_norm=143.181, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.155e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 13:47:38,398 (trainer:732) INFO: 49epoch:train:401-500batch: iter_time=1.057e-04, forward_time=0.140, loss_ctc=67.558, loss_att=49.800, acc=0.725, loss=55.128, backward_time=1.028, grad_norm=137.364, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.155e-05, train_time=2.770 +[gpub001:0/64] 2023-07-14 13:49:52,942 (trainer:732) INFO: 49epoch:train:501-600batch: iter_time=1.077e-04, forward_time=0.139, loss_ctc=67.201, loss_att=46.260, acc=0.720, loss=52.542, backward_time=1.019, grad_norm=114.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.154e-05, train_time=2.691 +[gpub001:0/64] 2023-07-14 13:52:16,838 (trainer:732) INFO: 49epoch:train:601-700batch: iter_time=1.205e-04, forward_time=0.142, loss_ctc=70.668, loss_att=51.546, acc=0.714, loss=57.283, backward_time=1.035, grad_norm=119.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.154e-05, train_time=2.878 +[gpub001:0/64] 2023-07-14 13:54:35,492 (trainer:732) INFO: 49epoch:train:701-800batch: iter_time=1.298e-04, forward_time=0.142, loss_ctc=61.423, loss_att=43.823, acc=0.717, loss=49.103, backward_time=1.026, grad_norm=107.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.153e-05, train_time=2.773 +[gpub001:0/64] 2023-07-14 13:55:27,347 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-14 13:55:45,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 13:55:48,387 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 13:55:48,388 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-14 13:55:48,394 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 14:01:54,878 (trainer:732) INFO: 49epoch:train:801-900batch: iter_time=1.305, forward_time=0.165, loss_ctc=83.766, loss_att=63.502, acc=0.709, loss=69.582, backward_time=1.037, grad_norm=161.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.153e-05, train_time=8.787 +[gpub001:0/64] 2023-07-14 14:04:11,115 (trainer:732) INFO: 49epoch:train:901-1000batch: iter_time=1.176e-04, forward_time=0.143, loss_ctc=74.996, loss_att=53.416, acc=0.702, loss=59.890, backward_time=1.026, grad_norm=131.720, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.152e-05, train_time=2.725 +[gpub001:0/64] 2023-07-14 14:06:26,981 (trainer:732) INFO: 49epoch:train:1001-1100batch: iter_time=1.237e-04, forward_time=0.143, loss_ctc=71.753, loss_att=56.326, acc=0.702, loss=60.954, backward_time=1.025, grad_norm=130.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.152e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 14:08:43,048 (trainer:732) INFO: 49epoch:train:1101-1200batch: iter_time=1.122e-04, forward_time=0.142, loss_ctc=79.119, loss_att=61.363, acc=0.706, loss=66.690, backward_time=1.027, grad_norm=115.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.151e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 14:10:58,405 (trainer:732) INFO: 49epoch:train:1201-1300batch: iter_time=1.299e-04, forward_time=0.143, loss_ctc=72.908, loss_att=53.208, acc=0.712, loss=59.118, backward_time=1.023, grad_norm=115.476, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.150e-05, train_time=2.707 +[gpub001:0/64] 2023-07-14 14:13:13,436 (trainer:732) INFO: 49epoch:train:1301-1400batch: iter_time=1.351e-04, forward_time=0.142, loss_ctc=61.698, loss_att=43.601, acc=0.720, loss=49.030, backward_time=1.022, grad_norm=118.028, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.150e-05, train_time=2.700 +[gpub001:0/64] 2023-07-14 14:15:28,715 (trainer:732) INFO: 49epoch:train:1401-1500batch: iter_time=1.272e-04, forward_time=0.143, loss_ctc=69.799, loss_att=51.631, acc=0.718, loss=57.082, backward_time=1.023, grad_norm=137.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.149e-05, train_time=2.705 +[gpub001:0/64] 2023-07-14 14:17:44,041 (trainer:732) INFO: 49epoch:train:1501-1600batch: iter_time=1.310e-04, forward_time=0.143, loss_ctc=62.191, loss_att=43.167, acc=0.720, loss=48.874, backward_time=1.024, grad_norm=110.624, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.149e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 14:19:28,411 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-14 14:19:46,106 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 14:19:49,464 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 14:19:49,464 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-14 14:19:49,518 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 14:24:53,336 (trainer:732) INFO: 49epoch:train:1601-1700batch: iter_time=2.398, forward_time=0.158, loss_ctc=86.888, loss_att=63.993, acc=0.702, loss=70.861, backward_time=1.039, grad_norm=144.375, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.148e-05, train_time=8.586 +[gpub001:0/64] 2023-07-14 14:27:11,824 (trainer:732) INFO: 49epoch:train:1701-1800batch: iter_time=1.060e-04, forward_time=0.144, loss_ctc=72.288, loss_att=55.796, acc=0.708, loss=60.743, backward_time=1.032, grad_norm=127.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.148e-05, train_time=2.770 +[gpub001:0/64] 2023-07-14 14:29:27,834 (trainer:732) INFO: 49epoch:train:1801-1900batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=72.461, loss_att=50.750, acc=0.721, loss=57.264, backward_time=1.026, grad_norm=124.107, clip=100.000, loss_scale=5.127e+32, optim_step_time=0.181, optim0_lr0=5.147e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 14:31:43,989 (trainer:732) INFO: 49epoch:train:1901-2000batch: iter_time=1.183e-04, forward_time=0.144, loss_ctc=77.931, loss_att=60.068, acc=0.711, loss=65.427, backward_time=1.028, grad_norm=134.506, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.147e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 14:34:00,002 (trainer:732) INFO: 49epoch:train:2001-2100batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=74.295, loss_att=58.950, acc=0.728, loss=63.554, backward_time=1.026, grad_norm=139.921, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.146e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 14:35:34,874 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-14 14:36:15,530 (trainer:732) INFO: 49epoch:train:2101-2200batch: iter_time=1.218e-04, forward_time=0.144, loss_ctc=66.147, loss_att=47.940, acc=0.731, loss=53.402, backward_time=1.025, grad_norm=127.949, clip=100.000, loss_scale=5.497e+32, optim_step_time=0.180, optim0_lr0=5.146e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 14:38:31,109 (trainer:732) INFO: 49epoch:train:2201-2300batch: iter_time=1.115e-04, forward_time=0.144, loss_ctc=63.495, loss_att=43.732, acc=0.732, loss=49.661, backward_time=1.025, grad_norm=107.717, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.145e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 14:40:46,863 (trainer:732) INFO: 49epoch:train:2301-2400batch: iter_time=1.134e-04, forward_time=0.144, loss_ctc=70.905, loss_att=53.186, acc=0.722, loss=58.502, backward_time=1.025, grad_norm=119.707, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.144e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 14:43:02,330 (trainer:732) INFO: 49epoch:train:2401-2500batch: iter_time=1.097e-04, forward_time=0.143, loss_ctc=71.274, loss_att=49.885, acc=0.724, loss=56.302, backward_time=1.024, grad_norm=140.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.144e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 14:43:03,512 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-14 14:43:21,506 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 14:43:24,979 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 14:43:24,979 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-14 14:43:24,985 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 14:50:14,455 (trainer:732) INFO: 49epoch:train:2501-2600batch: iter_time=1.236, forward_time=0.144, loss_ctc=77.943, loss_att=56.932, acc=0.709, loss=63.235, backward_time=1.048, grad_norm=215.635, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.143e-05, train_time=8.642 +[gpub001:0/64] 2023-07-14 14:52:30,604 (trainer:732) INFO: 49epoch:train:2601-2700batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=76.520, loss_att=56.178, acc=0.714, loss=62.281, backward_time=1.026, grad_norm=138.030, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.143e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 14:54:46,489 (trainer:732) INFO: 49epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=70.928, loss_att=50.393, acc=0.725, loss=56.554, backward_time=1.025, grad_norm=113.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.142e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 14:57:02,630 (trainer:732) INFO: 49epoch:train:2801-2900batch: iter_time=1.260e-04, forward_time=0.145, loss_ctc=80.053, loss_att=64.710, acc=0.711, loss=69.313, backward_time=1.028, grad_norm=130.479, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.142e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 14:59:17,960 (trainer:732) INFO: 49epoch:train:2901-3000batch: iter_time=1.280e-04, forward_time=0.143, loss_ctc=67.583, loss_att=50.322, acc=0.734, loss=55.500, backward_time=1.023, grad_norm=118.809, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.141e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 15:01:33,622 (trainer:732) INFO: 49epoch:train:3001-3100batch: iter_time=1.303e-04, forward_time=0.146, loss_ctc=64.848, loss_att=44.323, acc=0.737, loss=50.480, backward_time=1.024, grad_norm=131.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.141e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 15:03:48,945 (trainer:732) INFO: 49epoch:train:3101-3200batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=68.493, loss_att=50.724, acc=0.731, loss=56.054, backward_time=1.022, grad_norm=139.131, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.140e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 15:06:04,272 (trainer:732) INFO: 49epoch:train:3201-3300batch: iter_time=1.350e-04, forward_time=0.144, loss_ctc=62.583, loss_att=44.592, acc=0.725, loss=49.990, backward_time=1.023, grad_norm=116.550, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.140e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 15:06:50,474 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-14 15:07:08,807 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 15:07:12,196 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 15:07:12,197 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-14 15:07:12,203 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 15:12:51,478 (trainer:732) INFO: 49epoch:train:3301-3400batch: iter_time=1.286, forward_time=0.144, loss_ctc=82.374, loss_att=58.429, acc=0.718, loss=65.612, backward_time=1.042, grad_norm=161.884, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.139e-05, train_time=8.144 +[gpub001:0/64] 2023-07-14 15:15:09,454 (trainer:732) INFO: 49epoch:train:3401-3500batch: iter_time=1.225e-04, forward_time=0.143, loss_ctc=72.824, loss_att=56.074, acc=0.705, loss=61.099, backward_time=1.027, grad_norm=124.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.138e-05, train_time=2.759 +[gpub001:0/64] 2023-07-14 15:16:18,542 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-14 15:17:26,359 (trainer:732) INFO: 49epoch:train:3501-3600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=70.789, loss_att=51.064, acc=0.720, loss=56.982, backward_time=1.025, grad_norm=127.943, clip=100.000, loss_scale=2.417e+32, optim_step_time=0.180, optim0_lr0=5.138e-05, train_time=2.738 +[gpub001:0/64] 2023-07-14 15:19:44,415 (trainer:732) INFO: 49epoch:train:3601-3700batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=81.997, loss_att=65.961, acc=0.690, loss=70.771, backward_time=1.027, grad_norm=125.683, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.137e-05, train_time=2.761 +[gpub001:0/64] 2023-07-14 15:22:04,902 (trainer:732) INFO: 49epoch:train:3701-3800batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=68.446, loss_att=49.301, acc=0.730, loss=55.044, backward_time=1.028, grad_norm=115.877, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.137e-05, train_time=2.810 +[gpub001:0/64] 2023-07-14 15:24:23,287 (trainer:732) INFO: 49epoch:train:3801-3900batch: iter_time=1.214e-04, forward_time=0.144, loss_ctc=70.058, loss_att=52.734, acc=0.711, loss=57.931, backward_time=1.024, grad_norm=135.739, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.136e-05, train_time=2.767 +[gpub001:0/64] 2023-07-14 15:26:40,552 (trainer:732) INFO: 49epoch:train:3901-4000batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=63.341, loss_att=45.010, acc=0.729, loss=50.510, backward_time=1.027, grad_norm=115.671, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.136e-05, train_time=2.745 +[gpub001:0/64] 2023-07-14 15:28:56,692 (trainer:732) INFO: 49epoch:train:4001-4100batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=67.558, loss_att=49.060, acc=0.721, loss=54.609, backward_time=1.024, grad_norm=113.122, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.135e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 15:30:27,503 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-14 15:30:45,587 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 15:30:49,041 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 15:30:49,041 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-14 15:30:49,047 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 15:35:52,270 (trainer:732) INFO: 49epoch:train:4101-4200batch: iter_time=1.261, forward_time=0.144, loss_ctc=70.221, loss_att=53.085, acc=0.717, loss=58.226, backward_time=1.035, grad_norm=140.521, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.135e-05, train_time=8.311 +[gpub001:0/64] 2023-07-14 15:38:08,843 (trainer:732) INFO: 49epoch:train:4201-4300batch: iter_time=1.161e-04, forward_time=0.144, loss_ctc=73.382, loss_att=57.566, acc=0.707, loss=62.311, backward_time=1.029, grad_norm=116.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.134e-05, train_time=2.731 +[gpub001:0/64] 2023-07-14 15:40:24,503 (trainer:732) INFO: 49epoch:train:4301-4400batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=72.437, loss_att=50.993, acc=0.718, loss=57.426, backward_time=1.025, grad_norm=135.729, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.134e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 15:42:39,948 (trainer:732) INFO: 49epoch:train:4401-4500batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=79.101, loss_att=66.083, acc=0.685, loss=69.989, backward_time=1.023, grad_norm=131.828, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.133e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 15:44:55,456 (trainer:732) INFO: 49epoch:train:4501-4600batch: iter_time=1.249e-04, forward_time=0.143, loss_ctc=67.710, loss_att=48.912, acc=0.733, loss=54.551, backward_time=1.023, grad_norm=151.097, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.133e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 15:47:10,473 (trainer:732) INFO: 49epoch:train:4601-4700batch: iter_time=1.201e-04, forward_time=0.142, loss_ctc=67.960, loss_att=47.417, acc=0.714, loss=53.580, backward_time=1.021, grad_norm=139.328, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.132e-05, train_time=2.700 +[gpub001:0/64] 2023-07-14 15:49:28,348 (trainer:732) INFO: 49epoch:train:4701-4800batch: iter_time=1.428e-04, forward_time=0.143, loss_ctc=66.335, loss_att=46.579, acc=0.730, loss=52.506, backward_time=1.025, grad_norm=121.960, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.131e-05, train_time=2.757 +[gpub001:0/64] 2023-07-14 15:51:43,745 (trainer:732) INFO: 49epoch:train:4801-4900batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=65.075, loss_att=47.591, acc=0.717, loss=52.836, backward_time=1.024, grad_norm=142.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.131e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 15:53:59,339 (trainer:732) INFO: 49epoch:train:4901-5000batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=76.840, loss_att=55.520, acc=0.720, loss=61.916, backward_time=1.025, grad_norm=146.307, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.130e-05, train_time=2.712 +[gpub001:0/64] 2023-07-14 15:54:01,002 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-14 15:54:18,910 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 15:54:22,671 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 15:54:22,671 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-14 15:54:22,677 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 16:02:02,572 (trainer:732) INFO: 49epoch:train:5001-5100batch: iter_time=1.246, forward_time=0.173, loss_ctc=70.620, loss_att=55.070, acc=0.710, loss=59.735, backward_time=1.102, grad_norm=143.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.130e-05, train_time=9.664 +[gpub001:0/64] 2023-07-14 16:05:05,429 (trainer:732) INFO: 49epoch:train:5101-5200batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=74.968, loss_att=53.708, acc=0.725, loss=60.086, backward_time=1.111, grad_norm=140.062, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.129e-05, train_time=3.657 +[gpub001:0/64] 2023-07-14 16:07:39,205 (trainer:732) INFO: 49epoch:train:5201-5300batch: iter_time=1.230e-04, forward_time=0.143, loss_ctc=78.360, loss_att=62.863, acc=0.701, loss=67.512, backward_time=1.045, grad_norm=129.656, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.129e-05, train_time=3.075 +[gpub001:0/64] 2023-07-14 16:10:30,819 (trainer:732) INFO: 49epoch:train:5301-5400batch: iter_time=1.198e-04, forward_time=0.143, loss_ctc=70.380, loss_att=52.634, acc=0.739, loss=57.958, backward_time=1.065, grad_norm=145.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.179, optim0_lr0=5.128e-05, train_time=3.432 +[gpub001:0/64] 2023-07-14 16:13:06,699 (trainer:732) INFO: 49epoch:train:5401-5500batch: iter_time=1.248e-04, forward_time=0.144, loss_ctc=66.159, loss_att=49.037, acc=0.727, loss=54.174, backward_time=1.050, grad_norm=127.077, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.128e-05, train_time=3.117 +[gpub001:0/64] 2023-07-14 16:15:41,679 (trainer:732) INFO: 49epoch:train:5501-5600batch: iter_time=1.175e-04, forward_time=0.143, loss_ctc=63.480, loss_att=42.575, acc=0.741, loss=48.847, backward_time=1.045, grad_norm=114.198, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.127e-05, train_time=3.099 +[gpub001:0/64] 2023-07-14 16:18:02,778 (trainer:732) INFO: 49epoch:train:5601-5700batch: iter_time=1.207e-04, forward_time=0.143, loss_ctc=71.660, loss_att=53.814, acc=0.721, loss=59.168, backward_time=1.031, grad_norm=134.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.127e-05, train_time=2.822 +[gpub001:0/64] 2023-07-14 16:20:31,593 (trainer:732) INFO: 49epoch:train:5701-5800batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=71.301, loss_att=52.766, acc=0.725, loss=58.326, backward_time=1.041, grad_norm=137.471, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.126e-05, train_time=2.976 +[gpub001:0/64] 2023-07-14 16:21:30,492 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-14 16:21:48,619 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 16:21:52,043 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 16:21:52,043 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-14 16:21:52,049 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 16:27:57,645 (trainer:732) INFO: 49epoch:train:5801-5900batch: iter_time=1.522, forward_time=0.161, loss_ctc=73.115, loss_att=49.116, acc=0.724, loss=56.316, backward_time=1.040, grad_norm=136.046, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.126e-05, train_time=8.921 +[gpub001:0/64] 2023-07-14 16:30:13,954 (trainer:732) INFO: 49epoch:train:5901-6000batch: iter_time=1.296e-04, forward_time=0.144, loss_ctc=71.267, loss_att=55.700, acc=0.705, loss=60.370, backward_time=1.026, grad_norm=123.436, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.125e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 16:32:30,026 (trainer:732) INFO: 49epoch:train:6001-6100batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=70.944, loss_att=50.849, acc=0.722, loss=56.877, backward_time=1.022, grad_norm=205.768, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.124e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 16:34:45,468 (trainer:732) INFO: 49epoch:train:6101-6200batch: iter_time=1.363e-04, forward_time=0.144, loss_ctc=80.848, loss_att=65.774, acc=0.690, loss=70.296, backward_time=1.024, grad_norm=189.035, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.124e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 16:37:00,897 (trainer:732) INFO: 49epoch:train:6201-6300batch: iter_time=1.565e-04, forward_time=0.144, loss_ctc=68.282, loss_att=48.857, acc=0.734, loss=54.685, backward_time=1.025, grad_norm=112.677, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.123e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 16:39:16,422 (trainer:732) INFO: 49epoch:train:6301-6400batch: iter_time=1.438e-04, forward_time=0.144, loss_ctc=69.857, loss_att=51.306, acc=0.719, loss=56.871, backward_time=1.024, grad_norm=136.182, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.123e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 16:41:31,597 (trainer:732) INFO: 49epoch:train:6401-6500batch: iter_time=1.378e-04, forward_time=0.144, loss_ctc=62.547, loss_att=44.512, acc=0.727, loss=49.923, backward_time=1.022, grad_norm=115.827, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.122e-05, train_time=2.703 +[gpub001:0/64] 2023-07-14 16:43:46,948 (trainer:732) INFO: 49epoch:train:6501-6600batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=68.154, loss_att=48.823, acc=0.723, loss=54.622, backward_time=1.024, grad_norm=127.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.122e-05, train_time=2.707 +[gpub001:0/64] 2023-07-14 16:45:29,079 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-14 16:45:47,237 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 16:45:50,693 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 16:45:50,693 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-14 16:45:50,699 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 16:49:39,620 (trainer:732) INFO: 49epoch:train:6601-6700batch: iter_time=2.079, forward_time=0.183, loss_ctc=76.672, loss_att=56.369, acc=0.715, loss=62.460, backward_time=1.034, grad_norm=132.969, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.121e-05, train_time=7.053 +[gpub001:0/64] 2023-07-14 16:51:56,314 (trainer:732) INFO: 49epoch:train:6701-6800batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=70.489, loss_att=56.214, acc=0.716, loss=60.496, backward_time=1.027, grad_norm=152.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.121e-05, train_time=2.734 +[gpub001:0/64] 2023-07-14 16:54:12,308 (trainer:732) INFO: 49epoch:train:6801-6900batch: iter_time=1.133e-04, forward_time=0.143, loss_ctc=71.773, loss_att=49.137, acc=0.730, loss=55.928, backward_time=1.027, grad_norm=116.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.120e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 16:56:28,064 (trainer:732) INFO: 49epoch:train:6901-7000batch: iter_time=1.082e-04, forward_time=0.145, loss_ctc=76.239, loss_att=59.077, acc=0.716, loss=64.225, backward_time=1.026, grad_norm=122.696, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.120e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 16:58:43,796 (trainer:732) INFO: 49epoch:train:7001-7100batch: iter_time=1.142e-04, forward_time=0.144, loss_ctc=73.758, loss_att=58.472, acc=0.731, loss=63.058, backward_time=1.026, grad_norm=119.532, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.119e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 17:00:59,084 (trainer:732) INFO: 49epoch:train:7101-7200batch: iter_time=1.003e-04, forward_time=0.142, loss_ctc=67.069, loss_att=48.465, acc=0.731, loss=54.046, backward_time=1.023, grad_norm=124.919, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.119e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 17:03:14,636 (trainer:732) INFO: 49epoch:train:7201-7300batch: iter_time=9.950e-05, forward_time=0.144, loss_ctc=63.394, loss_att=43.420, acc=0.735, loss=49.412, backward_time=1.024, grad_norm=118.629, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.179, optim0_lr0=5.118e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 17:05:30,294 (trainer:732) INFO: 49epoch:train:7301-7400batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=70.573, loss_att=52.037, acc=0.731, loss=57.598, backward_time=1.025, grad_norm=138.691, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.117e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 17:07:45,765 (trainer:732) INFO: 49epoch:train:7401-7500batch: iter_time=9.254e-05, forward_time=0.144, loss_ctc=67.756, loss_att=48.815, acc=0.728, loss=54.498, backward_time=1.025, grad_norm=152.362, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.117e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 17:07:47,423 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-14 17:08:05,615 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 17:08:09,029 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 17:08:09,029 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-14 17:08:09,035 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 17:15:00,221 (trainer:732) INFO: 49epoch:train:7501-7600batch: iter_time=1.254, forward_time=0.144, loss_ctc=73.840, loss_att=54.828, acc=0.713, loss=60.532, backward_time=1.035, grad_norm=126.378, clip=100.000, loss_scale=2.434e+32, optim_step_time=0.180, optim0_lr0=5.116e-05, train_time=8.689 +[gpub001:0/64] 2023-07-14 17:17:16,603 (trainer:732) INFO: 49epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.144, loss_ctc=73.851, loss_att=54.824, acc=0.712, loss=60.532, backward_time=1.027, grad_norm=149.921, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.116e-05, train_time=2.727 +[gpub001:0/64] 2023-07-14 17:19:32,240 (trainer:732) INFO: 49epoch:train:7701-7800batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=69.949, loss_att=52.432, acc=0.715, loss=57.687, backward_time=1.024, grad_norm=117.932, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.115e-05, train_time=2.712 +[gpub001:0/64] 2023-07-14 17:21:47,986 (trainer:732) INFO: 49epoch:train:7801-7900batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=80.270, loss_att=64.561, acc=0.698, loss=69.274, backward_time=1.027, grad_norm=140.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.115e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 17:24:03,451 (trainer:732) INFO: 49epoch:train:7901-8000batch: iter_time=1.315e-04, forward_time=0.145, loss_ctc=65.982, loss_att=48.615, acc=0.731, loss=53.825, backward_time=1.024, grad_norm=143.731, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.114e-05, train_time=2.709 +[gpub001:0/64] 2023-07-14 17:26:18,815 (trainer:732) INFO: 49epoch:train:8001-8100batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=63.870, loss_att=44.066, acc=0.731, loss=50.007, backward_time=1.023, grad_norm=124.882, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.114e-05, train_time=2.707 +[gpub001:0/64] 2023-07-14 17:28:34,027 (trainer:732) INFO: 49epoch:train:8101-8200batch: iter_time=1.181e-04, forward_time=0.144, loss_ctc=68.490, loss_att=50.287, acc=0.723, loss=55.748, backward_time=1.021, grad_norm=116.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.113e-05, train_time=2.704 +[gpub001:0/64] 2023-07-14 17:30:49,344 (trainer:732) INFO: 49epoch:train:8201-8300batch: iter_time=1.236e-04, forward_time=0.145, loss_ctc=60.268, loss_att=42.861, acc=0.725, loss=48.083, backward_time=1.021, grad_norm=105.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.113e-05, train_time=2.706 +[gpub001:0/64] 2023-07-14 17:31:35,992 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-14 17:31:53,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 17:31:57,440 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 17:31:57,440 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-14 17:31:57,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 17:38:13,424 (trainer:732) INFO: 49epoch:train:8301-8400batch: iter_time=1.214, forward_time=0.154, loss_ctc=81.413, loss_att=59.604, acc=0.719, loss=66.147, backward_time=1.043, grad_norm=156.011, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.112e-05, train_time=8.881 +[gpub001:0/64] 2023-07-14 17:40:30,027 (trainer:732) INFO: 49epoch:train:8401-8500batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=72.136, loss_att=55.373, acc=0.718, loss=60.402, backward_time=1.026, grad_norm=133.755, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.112e-05, train_time=2.732 +[gpub001:0/64] 2023-07-14 17:42:45,754 (trainer:732) INFO: 49epoch:train:8501-8600batch: iter_time=1.202e-04, forward_time=0.145, loss_ctc=70.343, loss_att=50.701, acc=0.727, loss=56.593, backward_time=1.028, grad_norm=121.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.111e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 17:45:01,816 (trainer:732) INFO: 49epoch:train:8601-8700batch: iter_time=1.256e-04, forward_time=0.146, loss_ctc=80.448, loss_att=64.353, acc=0.708, loss=69.182, backward_time=1.028, grad_norm=135.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.111e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 17:47:20,645 (trainer:732) INFO: 49epoch:train:8701-8800batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=67.178, loss_att=48.931, acc=0.743, loss=54.405, backward_time=1.028, grad_norm=142.495, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.110e-05, train_time=2.776 +[gpub001:0/64] 2023-07-14 17:49:37,238 (trainer:732) INFO: 49epoch:train:8801-8900batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=68.598, loss_att=50.099, acc=0.731, loss=55.649, backward_time=1.027, grad_norm=124.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.109e-05, train_time=2.732 +[gpub001:0/64] 2023-07-14 17:51:56,378 (trainer:732) INFO: 49epoch:train:8901-9000batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=62.040, loss_att=43.423, acc=0.740, loss=49.008, backward_time=1.025, grad_norm=114.553, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.109e-05, train_time=2.783 +[gpub001:0/64] 2023-07-14 17:54:11,781 (trainer:732) INFO: 49epoch:train:9001-9100batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=67.395, loss_att=49.894, acc=0.729, loss=55.144, backward_time=1.022, grad_norm=128.721, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.108e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 17:56:00,273 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-14 17:56:18,139 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 17:56:21,608 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 17:56:21,608 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-14 17:56:21,615 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 18:01:26,368 (trainer:732) INFO: 49epoch:train:9101-9200batch: iter_time=1.617, forward_time=0.164, loss_ctc=69.323, loss_att=50.650, acc=0.726, loss=56.252, backward_time=1.035, grad_norm=140.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.108e-05, train_time=8.692 +[gpub001:0/64] 2023-07-14 18:03:44,759 (trainer:732) INFO: 49epoch:train:9201-9300batch: iter_time=1.228e-04, forward_time=0.147, loss_ctc=73.686, loss_att=57.660, acc=0.717, loss=62.468, backward_time=1.032, grad_norm=120.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.107e-05, train_time=2.768 +[gpub001:0/64] 2023-07-14 18:06:01,753 (trainer:732) INFO: 49epoch:train:9301-9400batch: iter_time=1.486e-04, forward_time=0.145, loss_ctc=71.589, loss_att=48.903, acc=0.728, loss=55.709, backward_time=1.025, grad_norm=119.501, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.107e-05, train_time=2.740 +[gpub001:0/64] 2023-07-14 18:08:19,922 (trainer:732) INFO: 49epoch:train:9401-9500batch: iter_time=1.301e-04, forward_time=0.144, loss_ctc=80.193, loss_att=65.513, acc=0.702, loss=69.917, backward_time=1.031, grad_norm=140.511, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.106e-05, train_time=2.763 +[gpub001:0/64] 2023-07-14 18:10:37,699 (trainer:732) INFO: 49epoch:train:9501-9600batch: iter_time=1.212e-04, forward_time=0.144, loss_ctc=67.328, loss_att=48.700, acc=0.744, loss=54.289, backward_time=1.027, grad_norm=128.383, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.106e-05, train_time=2.755 +[gpub001:0/64] 2023-07-14 18:12:55,391 (trainer:732) INFO: 49epoch:train:9601-9700batch: iter_time=1.316e-04, forward_time=0.143, loss_ctc=65.278, loss_att=46.806, acc=0.726, loss=52.347, backward_time=1.026, grad_norm=135.251, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.105e-05, train_time=2.754 +[gpub001:0/64] 2023-07-14 18:15:15,142 (trainer:732) INFO: 49epoch:train:9701-9800batch: iter_time=1.367e-04, forward_time=0.145, loss_ctc=65.510, loss_att=45.641, acc=0.741, loss=51.602, backward_time=1.034, grad_norm=115.004, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.105e-05, train_time=2.795 +[gpub001:0/64] 2023-07-14 18:17:33,807 (trainer:732) INFO: 49epoch:train:9801-9900batch: iter_time=1.295e-04, forward_time=0.144, loss_ctc=64.369, loss_att=47.790, acc=0.723, loss=52.764, backward_time=1.027, grad_norm=108.677, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.104e-05, train_time=2.773 +[gpub001:0/64] 2023-07-14 18:19:49,329 (trainer:732) INFO: 49epoch:train:9901-10000batch: iter_time=1.004e-04, forward_time=0.144, loss_ctc=76.087, loss_att=54.280, acc=0.724, loss=60.822, backward_time=1.024, grad_norm=120.371, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.104e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 18:33:02,493 (trainer:338) INFO: 49epoch results: [train] iter_time=0.177, forward_time=0.146, loss_ctc=71.210, loss_att=52.672, acc=0.719, loss=58.234, backward_time=1.030, grad_norm=131.679, clip=100.000, loss_scale=2.702e+32, optim_step_time=0.180, optim0_lr0=5.130e-05, train_time=3.462, time=4 hours, 48 minutes and 48.83 seconds, total_count=460000, gpu_max_cached_mem_GB=34.336, [valid] loss_ctc=43.418, cer_ctc=0.254, loss_att=37.707, acc=0.674, cer=0.423, wer=0.998, loss=39.421, time=7 minutes and 3.51 seconds, total_count=47058, gpu_max_cached_mem_GB=37.631, [att_plot] time=5 minutes and 53.56 seconds, total_count=0, gpu_max_cached_mem_GB=37.631 +[gpub001:0/64] 2023-07-14 18:33:18,474 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-14 18:33:18,484 (trainer:272) INFO: 50/60epoch started. Estimated time to finish: 2 days, 7 hours and 22 minutes +[gpub001:0/64] 2023-07-14 18:33:18,487 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-14 18:33:35,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 18:33:39,090 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 18:33:39,090 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-14 18:33:39,096 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 18:37:48,677 (trainer:732) INFO: 50epoch:train:1-100batch: iter_time=1.165, forward_time=0.188, loss_ctc=76.860, loss_att=56.022, acc=0.704, loss=62.273, backward_time=1.063, grad_norm=182.532, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.103e-05, train_time=5.403 +[gpub001:0/64] 2023-07-14 18:40:12,390 (trainer:732) INFO: 50epoch:train:101-200batch: iter_time=9.433e-05, forward_time=0.179, loss_ctc=63.633, loss_att=45.042, acc=0.731, loss=50.619, backward_time=1.036, grad_norm=138.710, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.103e-05, train_time=2.872 +[gpub001:0/64] 2023-07-14 18:42:28,865 (trainer:732) INFO: 50epoch:train:201-300batch: iter_time=9.529e-05, forward_time=0.145, loss_ctc=66.674, loss_att=52.287, acc=0.720, loss=56.603, backward_time=1.031, grad_norm=131.160, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.102e-05, train_time=2.731 +[gpub001:0/64] 2023-07-14 18:44:55,125 (trainer:732) INFO: 50epoch:train:301-400batch: iter_time=9.373e-05, forward_time=0.144, loss_ctc=70.708, loss_att=47.914, acc=0.725, loss=54.752, backward_time=1.038, grad_norm=128.284, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.101e-05, train_time=2.925 +[gpub001:0/64] 2023-07-14 18:47:16,114 (trainer:732) INFO: 50epoch:train:401-500batch: iter_time=9.543e-05, forward_time=0.145, loss_ctc=76.756, loss_att=56.886, acc=0.714, loss=62.847, backward_time=1.034, grad_norm=131.145, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.101e-05, train_time=2.820 +[gpub001:0/64] 2023-07-14 18:49:35,575 (trainer:732) INFO: 50epoch:train:501-600batch: iter_time=9.838e-05, forward_time=0.144, loss_ctc=66.678, loss_att=50.737, acc=0.720, loss=55.519, backward_time=1.034, grad_norm=115.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.100e-05, train_time=2.789 +[gpub001:0/64] 2023-07-14 18:52:04,139 (trainer:732) INFO: 50epoch:train:601-700batch: iter_time=9.636e-05, forward_time=0.157, loss_ctc=66.833, loss_att=48.220, acc=0.716, loss=53.804, backward_time=1.050, grad_norm=122.552, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.100e-05, train_time=2.971 +[gpub001:0/64] 2023-07-14 18:54:29,918 (trainer:732) INFO: 50epoch:train:701-800batch: iter_time=9.484e-05, forward_time=0.144, loss_ctc=71.313, loss_att=46.886, acc=0.720, loss=54.214, backward_time=1.043, grad_norm=121.906, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.099e-05, train_time=2.915 +[gpub001:0/64] 2023-07-14 18:55:23,066 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-14 18:55:40,513 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 18:55:43,867 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 18:55:43,867 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-14 18:55:43,874 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 19:00:29,306 (trainer:732) INFO: 50epoch:train:801-900batch: iter_time=1.702, forward_time=0.164, loss_ctc=74.067, loss_att=54.822, acc=0.707, loss=60.595, backward_time=1.045, grad_norm=162.733, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.099e-05, train_time=7.188 +[gpub001:0/64] 2023-07-14 19:02:46,018 (trainer:732) INFO: 50epoch:train:901-1000batch: iter_time=1.192e-04, forward_time=0.144, loss_ctc=63.885, loss_att=44.598, acc=0.731, loss=50.384, backward_time=1.029, grad_norm=110.262, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.098e-05, train_time=2.734 +[gpub001:0/64] 2023-07-14 19:05:02,005 (trainer:732) INFO: 50epoch:train:1001-1100batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=65.165, loss_att=50.965, acc=0.731, loss=55.225, backward_time=1.029, grad_norm=115.952, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.098e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 19:07:17,905 (trainer:732) INFO: 50epoch:train:1101-1200batch: iter_time=1.223e-04, forward_time=0.144, loss_ctc=72.477, loss_att=48.920, acc=0.727, loss=55.987, backward_time=1.029, grad_norm=135.909, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.097e-05, train_time=2.718 +[gpub001:0/64] 2023-07-14 19:09:33,663 (trainer:732) INFO: 50epoch:train:1201-1300batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=76.221, loss_att=57.505, acc=0.709, loss=63.120, backward_time=1.029, grad_norm=152.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.097e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 19:11:52,890 (trainer:732) INFO: 50epoch:train:1301-1400batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=64.913, loss_att=49.074, acc=0.717, loss=53.826, backward_time=1.032, grad_norm=143.678, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.096e-05, train_time=2.784 +[gpub001:0/64] 2023-07-14 19:14:08,777 (trainer:732) INFO: 50epoch:train:1401-1500batch: iter_time=1.097e-04, forward_time=0.145, loss_ctc=64.207, loss_att=46.300, acc=0.727, loss=51.672, backward_time=1.028, grad_norm=96.777, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.096e-05, train_time=2.718 +[gpub001:0/64] 2023-07-14 19:16:24,490 (trainer:732) INFO: 50epoch:train:1501-1600batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=72.579, loss_att=48.346, acc=0.712, loss=55.616, backward_time=1.026, grad_norm=117.818, clip=100.000, loss_scale=4.868e+32, optim_step_time=0.181, optim0_lr0=5.095e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 19:18:06,491 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-14 19:18:24,593 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 19:18:28,023 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 19:18:28,023 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-14 19:18:28,029 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 19:21:27,486 (trainer:732) INFO: 50epoch:train:1601-1700batch: iter_time=1.532, forward_time=0.145, loss_ctc=76.069, loss_att=57.854, acc=0.715, loss=63.318, backward_time=1.042, grad_norm=120.403, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.095e-05, train_time=6.060 +[gpub001:0/64] 2023-07-14 19:23:44,660 (trainer:732) INFO: 50epoch:train:1701-1800batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=69.245, loss_att=50.311, acc=0.700, loss=55.991, backward_time=1.031, grad_norm=146.320, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.094e-05, train_time=2.743 +[gpub001:0/64] 2023-07-14 19:26:00,439 (trainer:732) INFO: 50epoch:train:1801-1900batch: iter_time=1.160e-04, forward_time=0.144, loss_ctc=65.293, loss_att=47.043, acc=0.727, loss=52.518, backward_time=1.025, grad_norm=118.987, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.094e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 19:28:15,850 (trainer:732) INFO: 50epoch:train:1901-2000batch: iter_time=1.534e-04, forward_time=0.146, loss_ctc=64.296, loss_att=50.158, acc=0.720, loss=54.399, backward_time=1.025, grad_norm=125.395, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.093e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 19:30:31,788 (trainer:732) INFO: 50epoch:train:2001-2100batch: iter_time=1.528e-04, forward_time=0.147, loss_ctc=72.538, loss_att=52.328, acc=0.704, loss=58.391, backward_time=1.030, grad_norm=133.928, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.092e-05, train_time=2.719 +[gpub001:0/64] 2023-07-14 19:32:47,884 (trainer:732) INFO: 50epoch:train:2101-2200batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=73.571, loss_att=55.147, acc=0.713, loss=60.674, backward_time=1.031, grad_norm=116.254, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.092e-05, train_time=2.722 +[gpub001:0/64] 2023-07-14 19:35:03,746 (trainer:732) INFO: 50epoch:train:2201-2300batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=66.832, loss_att=48.835, acc=0.707, loss=54.234, backward_time=1.029, grad_norm=126.747, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.091e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 19:37:19,158 (trainer:732) INFO: 50epoch:train:2301-2400batch: iter_time=1.330e-04, forward_time=0.146, loss_ctc=69.922, loss_att=45.724, acc=0.719, loss=52.983, backward_time=1.027, grad_norm=121.655, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.091e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 19:39:35,125 (trainer:732) INFO: 50epoch:train:2401-2500batch: iter_time=1.500e-04, forward_time=0.147, loss_ctc=69.057, loss_att=51.498, acc=0.713, loss=56.766, backward_time=1.030, grad_norm=136.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.090e-05, train_time=2.719 +[gpub001:0/64] 2023-07-14 19:39:36,493 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-14 19:39:54,855 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 19:39:58,286 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 19:39:58,286 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-14 19:39:58,292 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 19:47:03,128 (trainer:732) INFO: 50epoch:train:2501-2600batch: iter_time=1.229, forward_time=0.146, loss_ctc=75.319, loss_att=54.368, acc=0.702, loss=60.653, backward_time=1.044, grad_norm=145.226, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.090e-05, train_time=8.960 +[gpub001:0/64] 2023-07-14 19:49:19,629 (trainer:732) INFO: 50epoch:train:2601-2700batch: iter_time=1.033e-04, forward_time=0.147, loss_ctc=62.652, loss_att=44.316, acc=0.729, loss=49.817, backward_time=1.029, grad_norm=163.995, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.089e-05, train_time=2.730 +[gpub001:0/64] 2023-07-14 19:51:35,460 (trainer:732) INFO: 50epoch:train:2701-2800batch: iter_time=1.076e-04, forward_time=0.145, loss_ctc=65.546, loss_att=51.752, acc=0.720, loss=55.890, backward_time=1.028, grad_norm=116.187, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.089e-05, train_time=2.716 +[gpub001:0/64] 2023-07-14 19:53:51,137 (trainer:732) INFO: 50epoch:train:2801-2900batch: iter_time=1.003e-04, forward_time=0.145, loss_ctc=69.677, loss_att=46.545, acc=0.724, loss=53.485, backward_time=1.027, grad_norm=151.273, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.088e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 19:55:55,895 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-14 19:56:06,946 (trainer:732) INFO: 50epoch:train:2901-3000batch: iter_time=1.015e-04, forward_time=0.145, loss_ctc=75.469, loss_att=56.637, acc=0.712, loss=62.287, backward_time=1.028, grad_norm=123.542, clip=100.000, loss_scale=6.225e+32, optim_step_time=0.181, optim0_lr0=5.088e-05, train_time=2.716 +[gpub001:0/64] 2023-07-14 19:58:23,795 (trainer:732) INFO: 50epoch:train:3001-3100batch: iter_time=9.923e-05, forward_time=0.145, loss_ctc=64.660, loss_att=50.088, acc=0.711, loss=54.459, backward_time=1.029, grad_norm=139.049, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.087e-05, train_time=2.737 +[gpub001:0/64] 2023-07-14 20:00:39,421 (trainer:732) INFO: 50epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.144, loss_ctc=65.956, loss_att=46.842, acc=0.716, loss=52.576, backward_time=1.026, grad_norm=112.318, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.087e-05, train_time=2.712 +[gpub001:0/64] 2023-07-14 20:02:55,138 (trainer:732) INFO: 50epoch:train:3201-3300batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=69.096, loss_att=46.225, acc=0.715, loss=53.086, backward_time=1.028, grad_norm=123.518, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.086e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 20:03:41,171 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-14 20:03:59,687 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 20:04:03,127 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 20:04:03,127 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-14 20:04:03,133 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 20:10:00,535 (trainer:732) INFO: 50epoch:train:3301-3400batch: iter_time=1.229, forward_time=0.207, loss_ctc=76.427, loss_att=55.751, acc=0.711, loss=61.954, backward_time=1.042, grad_norm=113.736, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.086e-05, train_time=8.507 +[gpub001:0/64] 2023-07-14 20:12:16,912 (trainer:732) INFO: 50epoch:train:3401-3500batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=67.057, loss_att=48.763, acc=0.708, loss=54.251, backward_time=1.030, grad_norm=130.479, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.085e-05, train_time=2.728 +[gpub001:0/64] 2023-07-14 20:14:32,971 (trainer:732) INFO: 50epoch:train:3501-3600batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=67.118, loss_att=49.572, acc=0.728, loss=54.835, backward_time=1.028, grad_norm=137.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.085e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 20:16:49,816 (trainer:732) INFO: 50epoch:train:3601-3700batch: iter_time=1.234e-04, forward_time=0.145, loss_ctc=65.072, loss_att=47.251, acc=0.722, loss=52.598, backward_time=1.026, grad_norm=124.680, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.084e-05, train_time=2.737 +[gpub001:0/64] 2023-07-14 20:19:06,520 (trainer:732) INFO: 50epoch:train:3701-3800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=68.645, loss_att=49.960, acc=0.716, loss=55.566, backward_time=1.026, grad_norm=119.968, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.084e-05, train_time=2.734 +[gpub001:0/64] 2023-07-14 20:21:22,393 (trainer:732) INFO: 50epoch:train:3801-3900batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=71.225, loss_att=52.976, acc=0.716, loss=58.451, backward_time=1.027, grad_norm=125.839, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.083e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 20:23:38,267 (trainer:732) INFO: 50epoch:train:3901-4000batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=67.296, loss_att=49.609, acc=0.708, loss=54.915, backward_time=1.027, grad_norm=122.521, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.082e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 20:25:53,904 (trainer:732) INFO: 50epoch:train:4001-4100batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=68.722, loss_att=44.612, acc=0.725, loss=51.845, backward_time=1.026, grad_norm=133.861, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.082e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 20:27:25,140 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-14 20:27:42,919 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 20:27:46,370 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 20:27:46,370 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-14 20:27:46,376 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 20:31:59,928 (trainer:732) INFO: 50epoch:train:4101-4200batch: iter_time=1.266, forward_time=0.158, loss_ctc=69.892, loss_att=54.719, acc=0.724, loss=59.271, backward_time=1.041, grad_norm=120.069, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.081e-05, train_time=7.318 +[gpub001:0/64] 2023-07-14 20:34:16,845 (trainer:732) INFO: 50epoch:train:4201-4300batch: iter_time=1.347e-04, forward_time=0.148, loss_ctc=72.245, loss_att=50.876, acc=0.714, loss=57.287, backward_time=1.033, grad_norm=144.739, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.081e-05, train_time=2.740 +[gpub001:0/64] 2023-07-14 20:36:33,354 (trainer:732) INFO: 50epoch:train:4301-4400batch: iter_time=1.372e-04, forward_time=0.145, loss_ctc=64.193, loss_att=45.992, acc=0.740, loss=51.452, backward_time=1.028, grad_norm=136.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.080e-05, train_time=2.730 +[gpub001:0/64] 2023-07-14 20:38:49,363 (trainer:732) INFO: 50epoch:train:4401-4500batch: iter_time=1.302e-04, forward_time=0.145, loss_ctc=64.055, loss_att=49.713, acc=0.733, loss=54.015, backward_time=1.029, grad_norm=152.603, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.080e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 20:41:05,362 (trainer:732) INFO: 50epoch:train:4501-4600batch: iter_time=1.058e-04, forward_time=0.146, loss_ctc=71.594, loss_att=49.931, acc=0.723, loss=56.430, backward_time=1.030, grad_norm=141.739, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.079e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 20:43:21,381 (trainer:732) INFO: 50epoch:train:4601-4700batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=72.948, loss_att=55.695, acc=0.725, loss=60.871, backward_time=1.029, grad_norm=108.629, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.079e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 20:45:37,140 (trainer:732) INFO: 50epoch:train:4701-4800batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=65.934, loss_att=47.948, acc=0.724, loss=53.344, backward_time=1.028, grad_norm=152.190, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.078e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 20:47:52,965 (trainer:732) INFO: 50epoch:train:4801-4900batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.950, loss_att=45.382, acc=0.730, loss=52.453, backward_time=1.029, grad_norm=130.881, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.078e-05, train_time=2.716 +[gpub001:0/64] 2023-07-14 20:50:08,537 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-14 20:50:26,538 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 20:50:30,045 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 20:50:30,046 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-14 20:50:30,052 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 20:54:56,835 (trainer:732) INFO: 50epoch:train:4901-5000batch: iter_time=1.261, forward_time=0.166, loss_ctc=68.113, loss_att=51.907, acc=0.712, loss=56.769, backward_time=1.030, grad_norm=123.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=8.477 +[gpub001:0/64] 2023-07-14 20:57:14,888 (trainer:732) INFO: 50epoch:train:5001-5100batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=74.370, loss_att=53.834, acc=0.714, loss=59.995, backward_time=1.037, grad_norm=133.466, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=2.761 +[gpub001:0/64] 2023-07-14 20:59:30,447 (trainer:732) INFO: 50epoch:train:5101-5200batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=63.418, loss_att=44.065, acc=0.743, loss=49.871, backward_time=1.025, grad_norm=118.916, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.076e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 21:01:46,392 (trainer:732) INFO: 50epoch:train:5201-5300batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=64.343, loss_att=50.622, acc=0.729, loss=54.738, backward_time=1.029, grad_norm=107.040, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.076e-05, train_time=2.719 +[gpub001:0/64] 2023-07-14 21:04:02,266 (trainer:732) INFO: 50epoch:train:5301-5400batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=67.130, loss_att=45.243, acc=0.738, loss=51.809, backward_time=1.027, grad_norm=126.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.075e-05, train_time=2.717 +[gpub001:0/64] 2023-07-14 21:06:18,342 (trainer:732) INFO: 50epoch:train:5401-5500batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=74.073, loss_att=55.688, acc=0.724, loss=61.203, backward_time=1.029, grad_norm=120.175, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.075e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 21:08:34,524 (trainer:732) INFO: 50epoch:train:5501-5600batch: iter_time=1.200e-04, forward_time=0.148, loss_ctc=64.861, loss_att=50.199, acc=0.726, loss=54.598, backward_time=1.029, grad_norm=114.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.074e-05, train_time=2.723 +[gpub001:0/64] 2023-07-14 21:10:57,350 (trainer:732) INFO: 50epoch:train:5601-5700batch: iter_time=1.141e-04, forward_time=0.146, loss_ctc=63.074, loss_att=45.439, acc=0.726, loss=50.730, backward_time=1.034, grad_norm=110.217, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.074e-05, train_time=2.856 +[gpub001:0/64] 2023-07-14 21:13:13,082 (trainer:732) INFO: 50epoch:train:5701-5800batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=69.031, loss_att=46.799, acc=0.723, loss=53.469, backward_time=1.027, grad_norm=143.702, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.073e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 21:14:11,324 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-14 21:14:29,273 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 21:14:32,688 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 21:14:32,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-14 21:14:32,749 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 21:19:03,310 (trainer:732) INFO: 50epoch:train:5801-5900batch: iter_time=2.002, forward_time=0.183, loss_ctc=73.655, loss_att=53.932, acc=0.722, loss=59.849, backward_time=1.055, grad_norm=115.715, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.073e-05, train_time=7.004 +[gpub001:0/64] 2023-07-14 21:21:32,431 (trainer:732) INFO: 50epoch:train:5901-6000batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=66.964, loss_att=50.032, acc=0.725, loss=55.112, backward_time=1.037, grad_norm=127.025, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.072e-05, train_time=2.983 +[gpub001:0/64] 2023-07-14 21:24:07,379 (trainer:732) INFO: 50epoch:train:6001-6100batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=66.762, loss_att=49.729, acc=0.735, loss=54.839, backward_time=1.080, grad_norm=115.938, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.071e-05, train_time=3.099 +[gpub001:0/64] 2023-07-14 21:26:28,272 (trainer:732) INFO: 50epoch:train:6101-6200batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=65.838, loss_att=47.261, acc=0.729, loss=52.834, backward_time=1.034, grad_norm=114.927, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.071e-05, train_time=2.818 +[gpub001:0/64] 2023-07-14 21:28:48,811 (trainer:732) INFO: 50epoch:train:6201-6300batch: iter_time=1.167e-04, forward_time=0.147, loss_ctc=68.647, loss_att=49.152, acc=0.730, loss=55.000, backward_time=1.037, grad_norm=128.845, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.070e-05, train_time=2.811 +[gpub001:0/64] 2023-07-14 21:30:05,936 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-14 21:31:12,672 (trainer:732) INFO: 50epoch:train:6301-6400batch: iter_time=1.053e-04, forward_time=0.147, loss_ctc=69.972, loss_att=51.775, acc=0.730, loss=57.234, backward_time=1.035, grad_norm=136.068, clip=100.000, loss_scale=2.484e+32, optim_step_time=0.182, optim0_lr0=5.070e-05, train_time=2.877 +[gpub001:0/64] 2023-07-14 21:33:31,183 (trainer:732) INFO: 50epoch:train:6401-6500batch: iter_time=1.154e-04, forward_time=0.146, loss_ctc=66.029, loss_att=48.270, acc=0.719, loss=53.598, backward_time=1.031, grad_norm=131.463, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.069e-05, train_time=2.770 +[gpub001:0/64] 2023-07-14 21:35:47,199 (trainer:732) INFO: 50epoch:train:6501-6600batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=68.099, loss_att=44.525, acc=0.732, loss=51.598, backward_time=1.027, grad_norm=116.086, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.069e-05, train_time=2.720 +[gpub001:0/64] 2023-07-14 21:37:20,083 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-14 21:37:38,383 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 21:37:41,825 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 21:37:41,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-14 21:37:41,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 21:41:34,057 (trainer:732) INFO: 50epoch:train:6601-6700batch: iter_time=1.243, forward_time=0.149, loss_ctc=75.228, loss_att=56.364, acc=0.705, loss=62.024, backward_time=1.043, grad_norm=131.714, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.068e-05, train_time=6.937 +[gpub001:0/64] 2023-07-14 21:43:51,597 (trainer:732) INFO: 50epoch:train:6701-6800batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=62.359, loss_att=46.440, acc=0.717, loss=51.216, backward_time=1.031, grad_norm=126.683, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.068e-05, train_time=2.751 +[gpub001:0/64] 2023-07-14 21:46:07,899 (trainer:732) INFO: 50epoch:train:6801-6900batch: iter_time=1.299e-04, forward_time=0.146, loss_ctc=65.735, loss_att=48.063, acc=0.725, loss=53.365, backward_time=1.029, grad_norm=134.386, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.067e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 21:48:23,594 (trainer:732) INFO: 50epoch:train:6901-7000batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=68.323, loss_att=49.027, acc=0.722, loss=54.815, backward_time=1.026, grad_norm=130.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.067e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 21:50:39,377 (trainer:732) INFO: 50epoch:train:7001-7100batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=70.496, loss_att=52.439, acc=0.716, loss=57.856, backward_time=1.029, grad_norm=129.853, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.066e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 21:52:55,578 (trainer:732) INFO: 50epoch:train:7101-7200batch: iter_time=1.273e-04, forward_time=0.147, loss_ctc=67.037, loss_att=51.153, acc=0.710, loss=55.918, backward_time=1.031, grad_norm=135.297, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.066e-05, train_time=2.724 +[gpub001:0/64] 2023-07-14 21:55:11,269 (trainer:732) INFO: 50epoch:train:7201-7300batch: iter_time=1.392e-04, forward_time=0.146, loss_ctc=64.625, loss_att=47.415, acc=0.716, loss=52.578, backward_time=1.028, grad_norm=138.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.065e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 21:57:27,032 (trainer:732) INFO: 50epoch:train:7301-7400batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=67.172, loss_att=45.670, acc=0.720, loss=52.120, backward_time=1.027, grad_norm=138.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.065e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 21:59:42,685 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-14 22:00:00,870 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 22:00:04,268 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 22:00:04,268 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-14 22:00:04,274 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 22:04:14,311 (trainer:732) INFO: 50epoch:train:7401-7500batch: iter_time=1.298, forward_time=0.174, loss_ctc=72.785, loss_att=54.934, acc=0.706, loss=60.289, backward_time=1.034, grad_norm=127.076, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.064e-05, train_time=8.145 +[gpub001:0/64] 2023-07-14 22:06:32,396 (trainer:732) INFO: 50epoch:train:7501-7600batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=75.267, loss_att=55.281, acc=0.706, loss=61.277, backward_time=1.033, grad_norm=127.032, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.064e-05, train_time=2.761 +[gpub001:0/64] 2023-07-14 22:08:49,298 (trainer:732) INFO: 50epoch:train:7601-7700batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=61.713, loss_att=43.430, acc=0.731, loss=48.915, backward_time=1.029, grad_norm=113.236, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.063e-05, train_time=2.738 +[gpub001:0/64] 2023-07-14 22:11:05,012 (trainer:732) INFO: 50epoch:train:7701-7800batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=64.435, loss_att=51.314, acc=0.720, loss=55.251, backward_time=1.025, grad_norm=137.487, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.063e-05, train_time=2.714 +[gpub001:0/64] 2023-07-14 22:13:20,529 (trainer:732) INFO: 50epoch:train:7801-7900batch: iter_time=1.071e-04, forward_time=0.145, loss_ctc=67.719, loss_att=45.921, acc=0.729, loss=52.460, backward_time=1.026, grad_norm=116.618, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.062e-05, train_time=2.710 +[gpub001:0/64] 2023-07-14 22:15:36,651 (trainer:732) INFO: 50epoch:train:7901-8000batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=74.668, loss_att=55.808, acc=0.718, loss=61.466, backward_time=1.030, grad_norm=138.995, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.062e-05, train_time=2.722 +[gpub001:0/64] 2023-07-14 22:17:52,426 (trainer:732) INFO: 50epoch:train:8001-8100batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=64.712, loss_att=49.154, acc=0.717, loss=53.822, backward_time=1.027, grad_norm=134.088, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.061e-05, train_time=2.715 +[gpub001:0/64] 2023-07-14 22:20:08,073 (trainer:732) INFO: 50epoch:train:8101-8200batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=62.415, loss_att=45.458, acc=0.725, loss=50.545, backward_time=1.026, grad_norm=104.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.061e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 22:22:24,409 (trainer:732) INFO: 50epoch:train:8201-8300batch: iter_time=1.217e-04, forward_time=0.144, loss_ctc=69.803, loss_att=47.319, acc=0.716, loss=54.064, backward_time=1.024, grad_norm=120.199, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.060e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 22:23:27,929 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-14 22:23:46,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 22:23:49,965 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 22:23:49,965 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-14 22:23:49,971 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 22:29:23,107 (trainer:732) INFO: 50epoch:train:8301-8400batch: iter_time=1.937, forward_time=0.171, loss_ctc=73.351, loss_att=54.696, acc=0.704, loss=60.292, backward_time=1.042, grad_norm=126.226, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.060e-05, train_time=8.373 +[gpub001:0/64] 2023-07-14 22:31:39,375 (trainer:732) INFO: 50epoch:train:8401-8500batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=65.360, loss_att=47.307, acc=0.724, loss=52.723, backward_time=1.028, grad_norm=106.196, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.059e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 22:33:56,198 (trainer:732) INFO: 50epoch:train:8501-8600batch: iter_time=1.121e-04, forward_time=0.145, loss_ctc=62.356, loss_att=47.082, acc=0.731, loss=51.664, backward_time=1.028, grad_norm=116.896, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.058e-05, train_time=2.736 +[gpub001:0/64] 2023-07-14 22:36:12,773 (trainer:732) INFO: 50epoch:train:8601-8700batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=69.928, loss_att=48.566, acc=0.721, loss=54.975, backward_time=1.028, grad_norm=123.106, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.058e-05, train_time=2.731 +[gpub001:0/64] 2023-07-14 22:38:28,734 (trainer:732) INFO: 50epoch:train:8701-8800batch: iter_time=1.093e-04, forward_time=0.145, loss_ctc=73.339, loss_att=55.181, acc=0.711, loss=60.628, backward_time=1.027, grad_norm=118.978, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.057e-05, train_time=2.719 +[gpub001:0/64] 2023-07-14 22:40:44,531 (trainer:732) INFO: 50epoch:train:8801-8900batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=63.033, loss_att=48.849, acc=0.707, loss=53.104, backward_time=1.028, grad_norm=132.965, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.057e-05, train_time=2.716 +[gpub001:0/64] 2023-07-14 22:42:59,903 (trainer:732) INFO: 50epoch:train:8901-9000batch: iter_time=1.334e-04, forward_time=0.145, loss_ctc=64.413, loss_att=46.307, acc=0.729, loss=51.739, backward_time=1.026, grad_norm=96.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.056e-05, train_time=2.707 +[gpub001:0/64] 2023-07-14 22:45:18,107 (trainer:732) INFO: 50epoch:train:9001-9100batch: iter_time=1.165e-04, forward_time=0.147, loss_ctc=69.114, loss_att=45.854, acc=0.719, loss=52.832, backward_time=1.032, grad_norm=140.806, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.056e-05, train_time=2.764 +[gpub001:0/64] 2023-07-14 22:46:49,602 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-14 22:47:07,559 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 22:47:10,970 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 22:47:10,970 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-14 22:47:11,050 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 22:52:32,878 (trainer:732) INFO: 50epoch:train:9101-9200batch: iter_time=2.016, forward_time=0.145, loss_ctc=79.028, loss_att=57.548, acc=0.699, loss=63.992, backward_time=1.040, grad_norm=136.426, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.055e-05, train_time=8.695 +[gpub001:0/64] 2023-07-14 22:54:49,816 (trainer:732) INFO: 50epoch:train:9201-9300batch: iter_time=1.038e-04, forward_time=0.145, loss_ctc=62.727, loss_att=47.586, acc=0.737, loss=52.129, backward_time=1.032, grad_norm=123.258, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.055e-05, train_time=2.739 +[gpub001:0/64] 2023-07-14 22:57:06,559 (trainer:732) INFO: 50epoch:train:9301-9400batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=64.256, loss_att=48.937, acc=0.734, loss=53.533, backward_time=1.029, grad_norm=116.732, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.054e-05, train_time=2.735 +[gpub001:0/64] 2023-07-14 22:59:22,627 (trainer:732) INFO: 50epoch:train:9401-9500batch: iter_time=1.020e-04, forward_time=0.144, loss_ctc=68.645, loss_att=49.152, acc=0.732, loss=55.000, backward_time=1.026, grad_norm=126.326, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.054e-05, train_time=2.721 +[gpub001:0/64] 2023-07-14 23:01:39,743 (trainer:732) INFO: 50epoch:train:9501-9600batch: iter_time=1.028e-04, forward_time=0.146, loss_ctc=71.460, loss_att=50.831, acc=0.729, loss=57.020, backward_time=1.030, grad_norm=125.820, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.053e-05, train_time=2.742 +[gpub001:0/64] 2023-07-14 23:03:56,039 (trainer:732) INFO: 50epoch:train:9601-9700batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=65.840, loss_att=52.027, acc=0.718, loss=56.171, backward_time=1.029, grad_norm=137.286, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.053e-05, train_time=2.726 +[gpub001:0/64] 2023-07-14 23:06:11,688 (trainer:732) INFO: 50epoch:train:9701-9800batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=65.045, loss_att=48.007, acc=0.726, loss=53.118, backward_time=1.026, grad_norm=116.113, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.052e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 23:08:27,117 (trainer:732) INFO: 50epoch:train:9801-9900batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=69.175, loss_att=45.588, acc=0.730, loss=52.664, backward_time=1.025, grad_norm=139.307, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.052e-05, train_time=2.708 +[gpub001:0/64] 2023-07-14 23:10:42,703 (trainer:732) INFO: 50epoch:train:9901-10000batch: iter_time=1.068e-04, forward_time=0.145, loss_ctc=72.841, loss_att=53.971, acc=0.715, loss=59.632, backward_time=1.025, grad_norm=123.765, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.051e-05, train_time=2.711 +[gpub001:0/64] 2023-07-14 23:24:09,245 (trainer:338) INFO: 50epoch results: [train] iter_time=0.179, forward_time=0.148, loss_ctc=68.583, loss_att=49.901, acc=0.720, loss=55.506, backward_time=1.032, grad_norm=127.995, clip=100.000, loss_scale=3.121e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=3.329, time=4 hours, 37 minutes and 40.49 seconds, total_count=470000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.429, cer_ctc=0.252, loss_att=37.607, acc=0.676, cer=0.417, wer=0.998, loss=39.054, time=7 minutes and 17.28 seconds, total_count=48070, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 52.99 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-14 23:24:24,757 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-14 23:24:24,796 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till50epoch.pth +[gpub001:0/64] 2023-07-14 23:25:13,113 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till50epoch.pth +[gpub001:0/64] 2023-07-14 23:25:53,416 (trainer:272) INFO: 51/60epoch started. Estimated time to finish: 2 days, 1 hour and 33 minutes +[gpub001:0/64] 2023-07-14 23:25:55,441 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-14 23:26:15,006 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 23:26:18,855 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 23:26:18,856 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-14 23:26:18,921 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-14 23:36:35,831 (trainer:732) INFO: 51epoch:train:1-100batch: iter_time=4.981, forward_time=0.181, loss_ctc=71.923, loss_att=55.595, acc=0.703, loss=60.494, backward_time=1.041, grad_norm=115.944, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.051e-05, train_time=12.824 +[gpub001:0/64] 2023-07-14 23:38:52,739 (trainer:732) INFO: 51epoch:train:101-200batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=79.235, loss_att=65.934, acc=0.698, loss=69.924, backward_time=1.031, grad_norm=121.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.050e-05, train_time=2.738 +[gpub001:0/64] 2023-07-14 23:41:08,953 (trainer:732) INFO: 51epoch:train:201-300batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=69.961, loss_att=49.731, acc=0.710, loss=55.800, backward_time=1.029, grad_norm=132.357, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.050e-05, train_time=2.724 +[gpub001:0/64] 2023-07-14 23:43:26,233 (trainer:732) INFO: 51epoch:train:301-400batch: iter_time=1.131e-04, forward_time=0.144, loss_ctc=64.152, loss_att=48.408, acc=0.691, loss=53.132, backward_time=1.026, grad_norm=128.225, clip=100.000, loss_scale=2.369e+32, optim_step_time=0.182, optim0_lr0=5.049e-05, train_time=2.745 +[gpub001:0/64] 2023-07-14 23:45:41,876 (trainer:732) INFO: 51epoch:train:401-500batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=72.180, loss_att=50.535, acc=0.705, loss=57.029, backward_time=1.028, grad_norm=153.483, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.049e-05, train_time=2.713 +[gpub001:0/64] 2023-07-14 23:48:00,509 (trainer:732) INFO: 51epoch:train:501-600batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=81.916, loss_att=67.146, acc=0.698, loss=71.577, backward_time=1.036, grad_norm=135.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.048e-05, train_time=2.772 +[gpub001:0/64] 2023-07-14 23:50:17,111 (trainer:732) INFO: 51epoch:train:601-700batch: iter_time=1.289e-04, forward_time=0.145, loss_ctc=62.275, loss_att=44.773, acc=0.726, loss=50.023, backward_time=1.029, grad_norm=104.898, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.048e-05, train_time=2.732 +[gpub001:0/64] 2023-07-14 23:52:33,852 (trainer:732) INFO: 51epoch:train:701-800batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=74.632, loss_att=56.174, acc=0.688, loss=61.711, backward_time=1.027, grad_norm=120.912, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.047e-05, train_time=2.735 +[gpub001:0/64] 2023-07-14 23:53:29,332 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-14 23:53:47,988 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-14 23:53:51,397 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-14 23:53:51,398 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-14 23:53:51,404 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 00:02:44,151 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 00:03:11,538 (trainer:732) INFO: 51epoch:train:801-900batch: iter_time=4.917, forward_time=0.176, loss_ctc=75.023, loss_att=61.940, acc=0.702, loss=65.865, backward_time=1.043, grad_norm=135.587, clip=100.000, loss_scale=2.914e+32, optim_step_time=0.183, optim0_lr0=5.047e-05, train_time=12.754 +[gpub001:0/64] 2023-07-15 00:05:28,614 (trainer:732) INFO: 51epoch:train:901-1000batch: iter_time=1.399e-04, forward_time=0.146, loss_ctc=77.022, loss_att=61.388, acc=0.701, loss=66.078, backward_time=1.031, grad_norm=116.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.046e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 00:07:45,430 (trainer:732) INFO: 51epoch:train:1001-1100batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=72.448, loss_att=57.061, acc=0.706, loss=61.677, backward_time=1.029, grad_norm=118.194, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.046e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 00:10:00,792 (trainer:732) INFO: 51epoch:train:1101-1200batch: iter_time=1.132e-04, forward_time=0.143, loss_ctc=63.641, loss_att=47.528, acc=0.704, loss=52.362, backward_time=1.025, grad_norm=122.929, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.045e-05, train_time=2.707 +[gpub001:0/64] 2023-07-15 00:12:16,409 (trainer:732) INFO: 51epoch:train:1201-1300batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=64.509, loss_att=47.066, acc=0.702, loss=52.299, backward_time=1.025, grad_norm=124.287, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.045e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 00:14:31,911 (trainer:732) INFO: 51epoch:train:1301-1400batch: iter_time=1.155e-04, forward_time=0.143, loss_ctc=81.052, loss_att=63.313, acc=0.697, loss=68.635, backward_time=1.025, grad_norm=161.228, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.044e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 00:16:47,763 (trainer:732) INFO: 51epoch:train:1401-1500batch: iter_time=1.052e-04, forward_time=0.144, loss_ctc=72.270, loss_att=53.340, acc=0.717, loss=59.019, backward_time=1.028, grad_norm=137.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.044e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 00:19:03,111 (trainer:732) INFO: 51epoch:train:1501-1600batch: iter_time=1.271e-04, forward_time=0.144, loss_ctc=70.162, loss_att=50.618, acc=0.706, loss=56.481, backward_time=1.025, grad_norm=170.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.043e-05, train_time=2.707 +[gpub001:0/64] 2023-07-15 00:20:39,237 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 00:20:57,151 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 00:21:00,575 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 00:21:00,575 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 00:21:00,582 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 00:24:56,757 (trainer:732) INFO: 51epoch:train:1601-1700batch: iter_time=1.325, forward_time=0.146, loss_ctc=74.706, loss_att=64.628, acc=0.698, loss=67.651, backward_time=1.039, grad_norm=142.799, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.043e-05, train_time=7.073 +[gpub001:0/64] 2023-07-15 00:27:14,907 (trainer:732) INFO: 51epoch:train:1701-1800batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=75.949, loss_att=63.359, acc=0.712, loss=67.136, backward_time=1.038, grad_norm=127.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.042e-05, train_time=2.763 +[gpub001:0/64] 2023-07-15 00:29:30,975 (trainer:732) INFO: 51epoch:train:1801-1900batch: iter_time=1.016e-04, forward_time=0.146, loss_ctc=69.913, loss_att=50.332, acc=0.719, loss=56.206, backward_time=1.031, grad_norm=110.939, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.041e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 00:31:46,677 (trainer:732) INFO: 51epoch:train:1901-2000batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.394, loss_att=48.413, acc=0.709, loss=53.208, backward_time=1.028, grad_norm=121.998, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.041e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 00:34:02,438 (trainer:732) INFO: 51epoch:train:2001-2100batch: iter_time=1.079e-04, forward_time=0.145, loss_ctc=66.797, loss_att=48.245, acc=0.712, loss=53.810, backward_time=1.029, grad_norm=124.842, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.040e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 00:36:19,042 (trainer:732) INFO: 51epoch:train:2101-2200batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=83.907, loss_att=64.979, acc=0.707, loss=70.657, backward_time=1.034, grad_norm=153.393, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.040e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 00:38:36,795 (trainer:732) INFO: 51epoch:train:2201-2300batch: iter_time=1.176e-04, forward_time=0.146, loss_ctc=64.525, loss_att=48.786, acc=0.723, loss=53.508, backward_time=1.030, grad_norm=103.031, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.039e-05, train_time=2.755 +[gpub001:0/64] 2023-07-15 00:40:52,543 (trainer:732) INFO: 51epoch:train:2301-2400batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=72.083, loss_att=51.081, acc=0.715, loss=57.381, backward_time=1.026, grad_norm=151.523, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.039e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 00:43:19,481 (trainer:732) INFO: 51epoch:train:2401-2500batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=70.723, loss_att=56.251, acc=0.711, loss=60.593, backward_time=1.041, grad_norm=144.872, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.038e-05, train_time=2.939 +[gpub001:0/64] 2023-07-15 00:43:21,090 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 00:43:39,291 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 00:43:42,732 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 00:43:42,733 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 00:43:42,739 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 00:49:10,741 (trainer:732) INFO: 51epoch:train:2501-2600batch: iter_time=1.293, forward_time=0.188, loss_ctc=72.326, loss_att=55.154, acc=0.706, loss=60.305, backward_time=1.054, grad_norm=150.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.038e-05, train_time=7.025 +[gpub001:0/64] 2023-07-15 00:51:36,074 (trainer:732) INFO: 51epoch:train:2601-2700batch: iter_time=1.215e-04, forward_time=0.149, loss_ctc=78.155, loss_att=65.045, acc=0.701, loss=68.978, backward_time=1.038, grad_norm=133.517, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.037e-05, train_time=2.907 +[gpub001:0/64] 2023-07-15 00:54:08,484 (trainer:732) INFO: 51epoch:train:2701-2800batch: iter_time=1.115e-04, forward_time=0.146, loss_ctc=68.157, loss_att=49.050, acc=0.717, loss=54.782, backward_time=1.051, grad_norm=120.581, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.037e-05, train_time=3.048 +[gpub001:0/64] 2023-07-15 00:56:50,168 (trainer:732) INFO: 51epoch:train:2801-2900batch: iter_time=1.083e-04, forward_time=0.145, loss_ctc=61.833, loss_att=46.996, acc=0.700, loss=51.447, backward_time=1.048, grad_norm=110.089, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.036e-05, train_time=3.233 +[gpub001:0/64] 2023-07-15 00:59:24,641 (trainer:732) INFO: 51epoch:train:2901-3000batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=74.094, loss_att=49.328, acc=0.708, loss=56.758, backward_time=1.046, grad_norm=137.040, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.036e-05, train_time=3.089 +[gpub001:0/64] 2023-07-15 01:01:53,489 (trainer:732) INFO: 51epoch:train:3001-3100batch: iter_time=1.135e-04, forward_time=0.145, loss_ctc=81.243, loss_att=67.309, acc=0.698, loss=71.489, backward_time=1.042, grad_norm=131.285, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.035e-05, train_time=2.977 +[gpub001:0/64] 2023-07-15 01:04:27,494 (trainer:732) INFO: 51epoch:train:3101-3200batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=61.668, loss_att=44.652, acc=0.728, loss=49.757, backward_time=1.047, grad_norm=103.749, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.035e-05, train_time=3.080 +[gpub001:0/64] 2023-07-15 01:06:55,343 (trainer:732) INFO: 51epoch:train:3201-3300batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=72.274, loss_att=53.996, acc=0.696, loss=59.479, backward_time=1.037, grad_norm=125.051, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.034e-05, train_time=2.957 +[gpub001:0/64] 2023-07-15 01:08:01,140 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 01:08:19,465 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 01:08:22,895 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 01:08:22,895 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 01:08:22,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 01:13:05,704 (trainer:732) INFO: 51epoch:train:3301-3400batch: iter_time=2.009, forward_time=0.148, loss_ctc=75.223, loss_att=63.691, acc=0.710, loss=67.150, backward_time=1.051, grad_norm=142.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.034e-05, train_time=7.407 +[gpub001:0/64] 2023-07-15 01:15:22,767 (trainer:732) INFO: 51epoch:train:3401-3500batch: iter_time=1.216e-04, forward_time=0.147, loss_ctc=74.723, loss_att=61.360, acc=0.699, loss=65.369, backward_time=1.033, grad_norm=144.345, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.033e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 01:17:38,745 (trainer:732) INFO: 51epoch:train:3501-3600batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=65.675, loss_att=49.439, acc=0.718, loss=54.310, backward_time=1.028, grad_norm=123.402, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.033e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 01:19:54,187 (trainer:732) INFO: 51epoch:train:3601-3700batch: iter_time=1.316e-04, forward_time=0.145, loss_ctc=60.673, loss_att=46.051, acc=0.693, loss=50.437, backward_time=1.026, grad_norm=100.622, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.032e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 01:22:10,043 (trainer:732) INFO: 51epoch:train:3701-3800batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=77.507, loss_att=54.217, acc=0.713, loss=61.204, backward_time=1.029, grad_norm=155.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.032e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 01:24:26,099 (trainer:732) INFO: 51epoch:train:3801-3900batch: iter_time=1.260e-04, forward_time=0.147, loss_ctc=77.820, loss_att=62.731, acc=0.702, loss=67.257, backward_time=1.031, grad_norm=134.212, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.031e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 01:26:42,048 (trainer:732) INFO: 51epoch:train:3901-4000batch: iter_time=1.229e-04, forward_time=0.147, loss_ctc=64.291, loss_att=46.914, acc=0.712, loss=52.127, backward_time=1.028, grad_norm=125.741, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.031e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 01:28:57,962 (trainer:732) INFO: 51epoch:train:4001-4100batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.662, loss_att=55.242, acc=0.705, loss=59.868, backward_time=1.027, grad_norm=125.153, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.030e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 01:30:40,640 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 01:30:58,549 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 01:31:02,016 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 01:31:02,017 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 01:31:02,023 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 01:35:33,364 (trainer:732) INFO: 51epoch:train:4101-4200batch: iter_time=1.388, forward_time=0.180, loss_ctc=68.988, loss_att=53.972, acc=0.717, loss=58.477, backward_time=1.042, grad_norm=120.989, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.030e-05, train_time=7.908 +[gpub001:0/64] 2023-07-15 01:37:51,985 (trainer:732) INFO: 51epoch:train:4201-4300batch: iter_time=1.179e-04, forward_time=0.147, loss_ctc=74.833, loss_att=60.854, acc=0.715, loss=65.048, backward_time=1.037, grad_norm=133.761, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.029e-05, train_time=2.772 +[gpub001:0/64] 2023-07-15 01:40:08,097 (trainer:732) INFO: 51epoch:train:4301-4400batch: iter_time=1.075e-04, forward_time=0.146, loss_ctc=69.438, loss_att=52.734, acc=0.721, loss=57.745, backward_time=1.029, grad_norm=123.930, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.029e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 01:42:25,511 (trainer:732) INFO: 51epoch:train:4401-4500batch: iter_time=1.121e-04, forward_time=0.155, loss_ctc=63.839, loss_att=48.472, acc=0.713, loss=53.082, backward_time=1.032, grad_norm=131.811, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.028e-05, train_time=2.748 +[gpub001:0/64] 2023-07-15 01:44:40,951 (trainer:732) INFO: 51epoch:train:4501-4600batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=66.389, loss_att=47.906, acc=0.715, loss=53.451, backward_time=1.026, grad_norm=153.232, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.028e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 01:46:57,089 (trainer:732) INFO: 51epoch:train:4601-4700batch: iter_time=1.045e-04, forward_time=0.145, loss_ctc=83.957, loss_att=64.618, acc=0.707, loss=70.420, backward_time=1.030, grad_norm=166.949, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.027e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 01:49:12,741 (trainer:732) INFO: 51epoch:train:4701-4800batch: iter_time=9.881e-05, forward_time=0.145, loss_ctc=64.633, loss_att=48.052, acc=0.725, loss=53.027, backward_time=1.029, grad_norm=122.350, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.027e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 01:51:28,305 (trainer:732) INFO: 51epoch:train:4801-4900batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=69.030, loss_att=50.159, acc=0.722, loss=55.820, backward_time=1.027, grad_norm=117.971, clip=100.000, loss_scale=1.947e+32, optim_step_time=0.182, optim0_lr0=5.026e-05, train_time=2.711 +[gpub001:0/64] 2023-07-15 01:53:45,279 (trainer:732) INFO: 51epoch:train:4901-5000batch: iter_time=1.268e-04, forward_time=0.153, loss_ctc=70.500, loss_att=57.120, acc=0.709, loss=61.134, backward_time=1.030, grad_norm=119.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.026e-05, train_time=2.739 +[gpub001:0/64] 2023-07-15 01:53:49,960 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 01:54:07,620 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 01:54:11,021 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 01:54:11,021 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 01:54:11,028 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 02:00:36,494 (trainer:732) INFO: 51epoch:train:5001-5100batch: iter_time=1.321, forward_time=0.197, loss_ctc=71.796, loss_att=54.624, acc=0.720, loss=59.775, backward_time=1.042, grad_norm=142.285, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.025e-05, train_time=8.225 +[gpub001:0/64] 2023-07-15 02:02:53,486 (trainer:732) INFO: 51epoch:train:5101-5200batch: iter_time=1.192e-04, forward_time=0.148, loss_ctc=77.425, loss_att=64.117, acc=0.711, loss=68.109, backward_time=1.031, grad_norm=121.077, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.025e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 02:05:14,153 (trainer:732) INFO: 51epoch:train:5201-5300batch: iter_time=2.312e-04, forward_time=0.185, loss_ctc=68.549, loss_att=49.844, acc=0.725, loss=55.455, backward_time=1.034, grad_norm=126.463, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.024e-05, train_time=2.813 +[gpub001:0/64] 2023-07-15 02:07:30,780 (trainer:732) INFO: 51epoch:train:5301-5400batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=61.261, loss_att=47.446, acc=0.706, loss=51.590, backward_time=1.030, grad_norm=122.414, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.024e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 02:09:46,510 (trainer:732) INFO: 51epoch:train:5401-5500batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=71.938, loss_att=49.005, acc=0.717, loss=55.885, backward_time=1.027, grad_norm=145.363, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.023e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 02:12:02,889 (trainer:732) INFO: 51epoch:train:5501-5600batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=80.684, loss_att=65.569, acc=0.713, loss=70.103, backward_time=1.031, grad_norm=154.669, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.023e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 02:14:18,639 (trainer:732) INFO: 51epoch:train:5601-5700batch: iter_time=1.232e-04, forward_time=0.145, loss_ctc=60.971, loss_att=43.444, acc=0.734, loss=48.702, backward_time=1.028, grad_norm=110.491, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.022e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 02:16:42,195 (trainer:732) INFO: 51epoch:train:5701-5800batch: iter_time=1.226e-04, forward_time=0.206, loss_ctc=71.178, loss_att=52.973, acc=0.713, loss=58.434, backward_time=1.033, grad_norm=126.966, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.022e-05, train_time=2.870 +[gpub001:0/64] 2023-07-15 02:17:48,783 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 02:18:06,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 02:18:10,346 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 02:18:10,346 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 02:18:10,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 02:24:23,285 (trainer:732) INFO: 51epoch:train:5801-5900batch: iter_time=3.167, forward_time=0.193, loss_ctc=73.954, loss_att=58.719, acc=0.720, loss=63.289, backward_time=1.046, grad_norm=120.270, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.021e-05, train_time=9.221 +[gpub001:0/64] 2023-07-15 02:26:42,799 (trainer:732) INFO: 51epoch:train:5901-6000batch: iter_time=1.365e-04, forward_time=0.148, loss_ctc=77.878, loss_att=59.660, acc=0.724, loss=65.125, backward_time=1.031, grad_norm=137.273, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.021e-05, train_time=2.791 +[gpub001:0/64] 2023-07-15 02:29:01,054 (trainer:732) INFO: 51epoch:train:6001-6100batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=71.477, loss_att=57.056, acc=0.717, loss=61.382, backward_time=1.032, grad_norm=132.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.020e-05, train_time=2.765 +[gpub001:0/64] 2023-07-15 02:31:22,810 (trainer:732) INFO: 51epoch:train:6101-6200batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=63.470, loss_att=47.131, acc=0.713, loss=52.033, backward_time=1.043, grad_norm=145.338, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.020e-05, train_time=2.835 +[gpub001:0/64] 2023-07-15 02:33:49,092 (trainer:732) INFO: 51epoch:train:6201-6300batch: iter_time=1.456e-04, forward_time=0.146, loss_ctc=63.174, loss_att=46.054, acc=0.716, loss=51.190, backward_time=1.040, grad_norm=140.173, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.019e-05, train_time=2.925 +[gpub001:0/64] 2023-07-15 02:36:09,198 (trainer:732) INFO: 51epoch:train:6301-6400batch: iter_time=9.504e-05, forward_time=0.145, loss_ctc=81.068, loss_att=60.989, acc=0.711, loss=67.012, backward_time=1.039, grad_norm=146.913, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.019e-05, train_time=2.802 +[gpub001:0/64] 2023-07-15 02:38:28,073 (trainer:732) INFO: 51epoch:train:6401-6500batch: iter_time=9.582e-05, forward_time=0.145, loss_ctc=71.425, loss_att=53.746, acc=0.727, loss=59.050, backward_time=1.032, grad_norm=128.568, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.018e-05, train_time=2.777 +[gpub001:0/64] 2023-07-15 02:40:43,948 (trainer:732) INFO: 51epoch:train:6501-6600batch: iter_time=1.048e-04, forward_time=0.144, loss_ctc=67.003, loss_att=49.566, acc=0.718, loss=54.797, backward_time=1.026, grad_norm=141.281, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.018e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 02:42:32,538 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 02:42:50,762 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 02:42:54,186 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 02:42:54,186 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 02:42:54,192 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 02:47:36,979 (trainer:732) INFO: 51epoch:train:6601-6700batch: iter_time=1.585, forward_time=0.162, loss_ctc=70.032, loss_att=58.956, acc=0.708, loss=62.279, backward_time=1.037, grad_norm=139.517, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.017e-05, train_time=8.260 +[gpub001:0/64] 2023-07-15 02:49:54,229 (trainer:732) INFO: 51epoch:train:6701-6800batch: iter_time=1.289e-04, forward_time=0.147, loss_ctc=75.545, loss_att=58.495, acc=0.711, loss=63.610, backward_time=1.033, grad_norm=141.681, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.017e-05, train_time=2.745 +[gpub001:0/64] 2023-07-15 02:52:10,761 (trainer:732) INFO: 51epoch:train:6801-6900batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=74.367, loss_att=61.655, acc=0.708, loss=65.468, backward_time=1.029, grad_norm=128.279, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.016e-05, train_time=2.730 +[gpub001:0/64] 2023-07-15 02:54:26,812 (trainer:732) INFO: 51epoch:train:6901-7000batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=65.571, loss_att=49.989, acc=0.708, loss=54.664, backward_time=1.028, grad_norm=112.823, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.016e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 02:56:42,153 (trainer:732) INFO: 51epoch:train:7001-7100batch: iter_time=1.037e-04, forward_time=0.144, loss_ctc=56.492, loss_att=43.028, acc=0.701, loss=47.067, backward_time=1.025, grad_norm=116.572, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.015e-05, train_time=2.707 +[gpub001:0/64] 2023-07-15 02:58:58,134 (trainer:732) INFO: 51epoch:train:7101-7200batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=84.088, loss_att=59.885, acc=0.716, loss=67.146, backward_time=1.030, grad_norm=159.957, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.015e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 03:01:13,992 (trainer:732) INFO: 51epoch:train:7201-7300batch: iter_time=1.333e-04, forward_time=0.145, loss_ctc=72.488, loss_att=55.906, acc=0.708, loss=60.881, backward_time=1.029, grad_norm=126.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.014e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 03:03:29,592 (trainer:732) INFO: 51epoch:train:7301-7400batch: iter_time=1.097e-04, forward_time=0.144, loss_ctc=67.910, loss_att=51.640, acc=0.710, loss=56.521, backward_time=1.028, grad_norm=112.939, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.014e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 03:05:45,812 (trainer:732) INFO: 51epoch:train:7401-7500batch: iter_time=9.587e-05, forward_time=0.146, loss_ctc=69.895, loss_att=58.438, acc=0.703, loss=61.875, backward_time=1.030, grad_norm=112.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.013e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 03:06:00,836 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 03:06:19,136 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 03:06:22,592 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 03:06:22,593 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 03:06:22,599 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 03:12:16,439 (trainer:732) INFO: 51epoch:train:7501-7600batch: iter_time=2.393, forward_time=0.145, loss_ctc=71.969, loss_att=54.560, acc=0.721, loss=59.782, backward_time=1.046, grad_norm=133.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.013e-05, train_time=7.812 +[gpub001:0/64] 2023-07-15 03:14:33,471 (trainer:732) INFO: 51epoch:train:7601-7700batch: iter_time=1.093e-04, forward_time=0.147, loss_ctc=76.969, loss_att=62.660, acc=0.719, loss=66.953, backward_time=1.033, grad_norm=136.541, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.012e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 03:16:50,003 (trainer:732) INFO: 51epoch:train:7701-7800batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=67.626, loss_att=48.666, acc=0.727, loss=54.354, backward_time=1.031, grad_norm=124.562, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.012e-05, train_time=2.730 +[gpub001:0/64] 2023-07-15 03:19:06,142 (trainer:732) INFO: 51epoch:train:7801-7900batch: iter_time=1.138e-04, forward_time=0.146, loss_ctc=61.458, loss_att=47.781, acc=0.708, loss=51.884, backward_time=1.029, grad_norm=132.908, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.011e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 03:21:21,748 (trainer:732) INFO: 51epoch:train:7901-8000batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=73.377, loss_att=49.099, acc=0.720, loss=56.382, backward_time=1.026, grad_norm=145.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.011e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 03:23:41,893 (trainer:732) INFO: 51epoch:train:8001-8100batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=80.253, loss_att=65.949, acc=0.710, loss=70.240, backward_time=1.043, grad_norm=118.794, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.010e-05, train_time=2.803 +[gpub001:0/64] 2023-07-15 03:25:57,929 (trainer:732) INFO: 51epoch:train:8101-8200batch: iter_time=1.161e-04, forward_time=0.146, loss_ctc=59.833, loss_att=43.945, acc=0.737, loss=48.712, backward_time=1.028, grad_norm=97.269, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.010e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 03:28:22,509 (trainer:732) INFO: 51epoch:train:8201-8300batch: iter_time=4.203e-04, forward_time=0.206, loss_ctc=70.476, loss_att=52.382, acc=0.712, loss=57.810, backward_time=1.036, grad_norm=124.276, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.009e-05, train_time=2.891 +[gpub001:0/64] 2023-07-15 03:29:12,660 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 03:29:30,661 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 03:29:34,357 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 03:29:34,357 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 03:29:34,364 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 03:34:24,246 (trainer:732) INFO: 51epoch:train:8301-8400batch: iter_time=1.383, forward_time=0.183, loss_ctc=74.757, loss_att=62.043, acc=0.719, loss=65.857, backward_time=1.043, grad_norm=136.488, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.009e-05, train_time=7.234 +[gpub001:0/64] 2023-07-15 03:36:41,508 (trainer:732) INFO: 51epoch:train:8401-8500batch: iter_time=9.487e-05, forward_time=0.147, loss_ctc=73.152, loss_att=60.272, acc=0.703, loss=64.136, backward_time=1.032, grad_norm=130.904, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.008e-05, train_time=2.745 +[gpub001:0/64] 2023-07-15 03:38:58,154 (trainer:732) INFO: 51epoch:train:8501-8600batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=66.048, loss_att=49.450, acc=0.717, loss=54.429, backward_time=1.029, grad_norm=115.954, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.008e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 03:41:13,765 (trainer:732) INFO: 51epoch:train:8601-8700batch: iter_time=9.860e-05, forward_time=0.143, loss_ctc=60.580, loss_att=45.944, acc=0.699, loss=50.334, backward_time=1.024, grad_norm=132.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.007e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 03:43:30,063 (trainer:732) INFO: 51epoch:train:8701-8800batch: iter_time=1.299e-04, forward_time=0.147, loss_ctc=75.928, loss_att=53.765, acc=0.716, loss=60.414, backward_time=1.030, grad_norm=155.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.006e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 03:45:21,567 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 03:45:45,981 (trainer:732) INFO: 51epoch:train:8801-8900batch: iter_time=1.266e-04, forward_time=0.147, loss_ctc=77.868, loss_att=61.551, acc=0.707, loss=66.446, backward_time=1.029, grad_norm=134.524, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.006e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 03:48:03,191 (trainer:732) INFO: 51epoch:train:8901-9000batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=64.524, loss_att=46.573, acc=0.715, loss=51.958, backward_time=1.029, grad_norm=138.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.006e-05, train_time=2.744 +[gpub001:0/64] 2023-07-15 03:50:20,560 (trainer:732) INFO: 51epoch:train:9001-9100batch: iter_time=1.038e-04, forward_time=0.146, loss_ctc=70.040, loss_att=54.802, acc=0.706, loss=59.373, backward_time=1.032, grad_norm=151.464, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.005e-05, train_time=2.747 +[gpub001:0/64] 2023-07-15 03:51:53,225 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 03:52:11,289 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 03:52:14,696 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 03:52:14,696 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 03:52:14,703 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 03:57:27,451 (trainer:732) INFO: 51epoch:train:9101-9200batch: iter_time=1.306, forward_time=0.144, loss_ctc=65.711, loss_att=50.921, acc=0.714, loss=55.358, backward_time=1.041, grad_norm=118.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.005e-05, train_time=8.538 +[gpub001:0/64] 2023-07-15 03:59:49,919 (trainer:732) INFO: 51epoch:train:9201-9300batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=75.976, loss_att=57.729, acc=0.724, loss=63.203, backward_time=1.042, grad_norm=148.716, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.004e-05, train_time=2.849 +[gpub001:0/64] 2023-07-15 04:02:07,659 (trainer:732) INFO: 51epoch:train:9301-9400batch: iter_time=1.137e-04, forward_time=0.148, loss_ctc=74.558, loss_att=59.518, acc=0.719, loss=64.030, backward_time=1.034, grad_norm=124.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.003e-05, train_time=2.755 +[gpub001:0/64] 2023-07-15 04:04:27,052 (trainer:732) INFO: 51epoch:train:9401-9500batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=64.717, loss_att=49.875, acc=0.718, loss=54.328, backward_time=1.038, grad_norm=132.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.003e-05, train_time=2.788 +[gpub001:0/64] 2023-07-15 04:06:45,446 (trainer:732) INFO: 51epoch:train:9501-9600batch: iter_time=9.616e-05, forward_time=0.145, loss_ctc=55.849, loss_att=42.233, acc=0.712, loss=46.317, backward_time=1.030, grad_norm=119.204, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.002e-05, train_time=2.768 +[gpub001:0/64] 2023-07-15 04:09:01,853 (trainer:732) INFO: 51epoch:train:9601-9700batch: iter_time=1.058e-04, forward_time=0.147, loss_ctc=82.299, loss_att=58.433, acc=0.727, loss=65.593, backward_time=1.032, grad_norm=158.815, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.002e-05, train_time=2.728 +[gpub001:0/64] 2023-07-15 04:11:21,471 (trainer:732) INFO: 51epoch:train:9701-9800batch: iter_time=1.045e-04, forward_time=0.146, loss_ctc=73.290, loss_att=56.427, acc=0.712, loss=61.486, backward_time=1.036, grad_norm=130.108, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.001e-05, train_time=2.792 +[gpub001:0/64] 2023-07-15 04:13:37,214 (trainer:732) INFO: 51epoch:train:9801-9900batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=67.601, loss_att=50.730, acc=0.720, loss=55.791, backward_time=1.028, grad_norm=117.358, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.001e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 04:15:53,312 (trainer:732) INFO: 51epoch:train:9901-10000batch: iter_time=9.479e-05, forward_time=0.147, loss_ctc=68.884, loss_att=56.769, acc=0.722, loss=60.403, backward_time=1.029, grad_norm=111.830, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.000e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 04:28:17,825 (trainer:338) INFO: 51epoch results: [train] iter_time=0.271, forward_time=0.150, loss_ctc=70.867, loss_att=54.317, acc=0.712, loss=59.282, backward_time=1.033, grad_norm=130.980, clip=100.000, loss_scale=2.538e+32, optim_step_time=0.182, optim0_lr0=5.025e-05, train_time=3.480, time=4 hours, 50 minutes and 9.54 seconds, total_count=480000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.872, cer_ctc=0.251, loss_att=38.738, acc=0.678, cer=0.404, wer=0.996, loss=39.978, time=6 minutes and 25.08 seconds, total_count=49082, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 49.35 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-15 04:28:33,383 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-15 04:28:33,396 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/46epoch.pth +[gpub001:0/64] 2023-07-15 04:28:33,396 (trainer:272) INFO: 52/60epoch started. Estimated time to finish: 1 day, 20 hours and 51 minutes +[gpub001:0/64] 2023-07-15 04:28:33,399 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-15 04:28:51,055 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 04:28:54,528 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 04:28:54,528 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 04:28:54,534 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 04:35:34,681 (trainer:732) INFO: 52epoch:train:1-100batch: iter_time=2.800, forward_time=0.165, loss_ctc=69.950, loss_att=52.193, acc=0.709, loss=57.520, backward_time=1.039, grad_norm=115.030, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.000e-05, train_time=8.425 +[gpub001:0/64] 2023-07-15 04:37:51,420 (trainer:732) INFO: 52epoch:train:101-200batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=69.058, loss_att=52.313, acc=0.715, loss=57.337, backward_time=1.032, grad_norm=143.837, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.999e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 04:40:09,108 (trainer:732) INFO: 52epoch:train:201-300batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=69.512, loss_att=51.083, acc=0.718, loss=56.611, backward_time=1.032, grad_norm=136.816, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.999e-05, train_time=2.754 +[gpub001:0/64] 2023-07-15 04:42:29,211 (trainer:732) INFO: 52epoch:train:301-400batch: iter_time=1.408e-04, forward_time=0.146, loss_ctc=77.821, loss_att=61.168, acc=0.703, loss=66.164, backward_time=1.035, grad_norm=151.115, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.998e-05, train_time=2.802 +[gpub001:0/64] 2023-07-15 04:44:49,281 (trainer:732) INFO: 52epoch:train:401-500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=59.969, loss_att=45.427, acc=0.721, loss=49.790, backward_time=1.032, grad_norm=126.820, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.998e-05, train_time=2.801 +[gpub001:0/64] 2023-07-15 04:47:08,640 (trainer:732) INFO: 52epoch:train:501-600batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=61.323, loss_att=48.328, acc=0.726, loss=52.227, backward_time=1.037, grad_norm=125.310, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.997e-05, train_time=2.787 +[gpub001:0/64] 2023-07-15 04:49:31,134 (trainer:732) INFO: 52epoch:train:601-700batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=75.112, loss_att=63.431, acc=0.711, loss=66.935, backward_time=1.054, grad_norm=142.319, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.997e-05, train_time=2.850 +[gpub001:0/64] 2023-07-15 04:51:49,592 (trainer:732) INFO: 52epoch:train:701-800batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=75.882, loss_att=61.266, acc=0.716, loss=65.651, backward_time=1.033, grad_norm=112.293, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.996e-05, train_time=2.769 +[gpub001:0/64] 2023-07-15 04:52:41,328 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-15 04:52:59,420 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 04:53:02,825 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 04:53:02,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 04:53:02,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 04:57:23,419 (trainer:732) INFO: 52epoch:train:801-900batch: iter_time=1.338, forward_time=0.237, loss_ctc=68.461, loss_att=51.350, acc=0.709, loss=56.483, backward_time=1.050, grad_norm=146.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.996e-05, train_time=6.676 +[gpub001:0/64] 2023-07-15 04:59:41,599 (trainer:732) INFO: 52epoch:train:901-1000batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=69.156, loss_att=58.040, acc=0.702, loss=61.375, backward_time=1.029, grad_norm=139.715, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.996e-05, train_time=2.764 +[gpub001:0/64] 2023-07-15 05:01:57,285 (trainer:732) INFO: 52epoch:train:1001-1100batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=71.143, loss_att=50.487, acc=0.721, loss=56.684, backward_time=1.029, grad_norm=117.836, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.995e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 05:04:13,647 (trainer:732) INFO: 52epoch:train:1101-1200batch: iter_time=1.046e-04, forward_time=0.147, loss_ctc=74.174, loss_att=57.692, acc=0.712, loss=62.637, backward_time=1.030, grad_norm=138.130, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.995e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 05:06:29,733 (trainer:732) INFO: 52epoch:train:1201-1300batch: iter_time=1.114e-04, forward_time=0.147, loss_ctc=57.362, loss_att=42.969, acc=0.723, loss=47.287, backward_time=1.029, grad_norm=118.350, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.994e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 05:08:45,836 (trainer:732) INFO: 52epoch:train:1301-1400batch: iter_time=1.205e-04, forward_time=0.149, loss_ctc=65.213, loss_att=52.131, acc=0.732, loss=56.055, backward_time=1.030, grad_norm=140.121, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.994e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 05:11:02,183 (trainer:732) INFO: 52epoch:train:1401-1500batch: iter_time=1.198e-04, forward_time=0.149, loss_ctc=76.107, loss_att=62.021, acc=0.715, loss=66.247, backward_time=1.032, grad_norm=127.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.993e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 05:13:18,216 (trainer:732) INFO: 52epoch:train:1501-1600batch: iter_time=1.083e-04, forward_time=0.148, loss_ctc=66.997, loss_att=54.395, acc=0.727, loss=58.175, backward_time=1.030, grad_norm=117.246, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.993e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 05:14:59,026 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 05:15:17,049 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 05:15:20,571 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 05:15:20,571 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 05:15:20,578 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 05:22:59,109 (trainer:732) INFO: 52epoch:train:1601-1700batch: iter_time=4.399, forward_time=0.169, loss_ctc=76.666, loss_att=57.348, acc=0.713, loss=63.143, backward_time=1.040, grad_norm=154.498, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.992e-05, train_time=11.617 +[gpub001:0/64] 2023-07-15 05:25:16,028 (trainer:732) INFO: 52epoch:train:1701-1800batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=68.875, loss_att=53.803, acc=0.711, loss=58.325, backward_time=1.031, grad_norm=128.396, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.992e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 05:27:31,940 (trainer:732) INFO: 52epoch:train:1801-1900batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=72.358, loss_att=52.034, acc=0.719, loss=58.132, backward_time=1.028, grad_norm=128.456, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.991e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 05:29:52,478 (trainer:732) INFO: 52epoch:train:1901-2000batch: iter_time=1.215e-04, forward_time=0.147, loss_ctc=74.207, loss_att=56.605, acc=0.713, loss=61.886, backward_time=1.047, grad_norm=146.145, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.991e-05, train_time=2.811 +[gpub001:0/64] 2023-07-15 05:32:13,571 (trainer:732) INFO: 52epoch:train:2001-2100batch: iter_time=1.101e-04, forward_time=0.146, loss_ctc=60.906, loss_att=47.967, acc=0.726, loss=51.849, backward_time=1.035, grad_norm=150.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.990e-05, train_time=2.822 +[gpub001:0/64] 2023-07-15 05:34:34,015 (trainer:732) INFO: 52epoch:train:2101-2200batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=59.010, loss_att=46.764, acc=0.727, loss=50.438, backward_time=1.032, grad_norm=205.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.990e-05, train_time=2.809 +[gpub001:0/64] 2023-07-15 05:37:07,034 (trainer:732) INFO: 52epoch:train:2201-2300batch: iter_time=0.005, forward_time=0.207, loss_ctc=72.052, loss_att=58.747, acc=0.714, loss=62.738, backward_time=1.065, grad_norm=153.377, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.212, optim0_lr0=4.989e-05, train_time=3.058 +[gpub001:0/64] 2023-07-15 05:39:24,955 (trainer:732) INFO: 52epoch:train:2301-2400batch: iter_time=1.118e-04, forward_time=0.148, loss_ctc=77.542, loss_att=63.924, acc=0.722, loss=68.010, backward_time=1.031, grad_norm=138.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.989e-05, train_time=2.760 +[gpub001:0/64] 2023-07-15 05:41:45,240 (trainer:732) INFO: 52epoch:train:2401-2500batch: iter_time=1.128e-04, forward_time=0.144, loss_ctc=69.658, loss_att=49.632, acc=0.723, loss=55.639, backward_time=1.028, grad_norm=121.430, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.988e-05, train_time=2.805 +[gpub001:0/64] 2023-07-15 05:41:58,122 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 05:42:16,233 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 05:42:19,646 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 05:42:19,646 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 05:42:19,652 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 05:47:06,685 (trainer:732) INFO: 52epoch:train:2501-2600batch: iter_time=1.721, forward_time=0.145, loss_ctc=68.445, loss_att=51.045, acc=0.711, loss=56.265, backward_time=1.045, grad_norm=122.585, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.988e-05, train_time=6.429 +[gpub001:0/64] 2023-07-15 05:49:23,184 (trainer:732) INFO: 52epoch:train:2601-2700batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=68.564, loss_att=53.136, acc=0.713, loss=57.764, backward_time=1.031, grad_norm=117.186, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.987e-05, train_time=2.730 +[gpub001:0/64] 2023-07-15 05:51:38,947 (trainer:732) INFO: 52epoch:train:2701-2800batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=68.961, loss_att=48.677, acc=0.721, loss=54.762, backward_time=1.027, grad_norm=134.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.987e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 05:53:54,877 (trainer:732) INFO: 52epoch:train:2801-2900batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=77.652, loss_att=61.174, acc=0.707, loss=66.117, backward_time=1.028, grad_norm=169.753, clip=100.000, loss_scale=3.829e+32, optim_step_time=0.181, optim0_lr0=4.986e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 05:56:10,325 (trainer:732) INFO: 52epoch:train:2901-3000batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=59.198, loss_att=43.540, acc=0.729, loss=48.237, backward_time=1.026, grad_norm=130.238, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.986e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 05:58:26,241 (trainer:732) INFO: 52epoch:train:3001-3100batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=61.246, loss_att=47.337, acc=0.732, loss=51.510, backward_time=1.029, grad_norm=113.002, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.985e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 06:00:42,516 (trainer:732) INFO: 52epoch:train:3101-3200batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=73.840, loss_att=62.268, acc=0.715, loss=65.740, backward_time=1.030, grad_norm=132.126, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.985e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 06:02:01,422 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 06:02:58,632 (trainer:732) INFO: 52epoch:train:3201-3300batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=72.796, loss_att=59.618, acc=0.723, loss=63.572, backward_time=1.031, grad_norm=118.588, clip=100.000, loss_scale=5.100e+32, optim_step_time=0.182, optim0_lr0=4.984e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 06:03:46,609 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 06:04:04,946 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 06:04:08,402 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 06:04:08,402 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 06:04:08,408 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 06:08:26,592 (trainer:732) INFO: 52epoch:train:3301-3400batch: iter_time=1.265, forward_time=0.146, loss_ctc=67.963, loss_att=50.761, acc=0.709, loss=55.921, backward_time=1.042, grad_norm=138.942, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.984e-05, train_time=6.559 +[gpub001:0/64] 2023-07-15 06:10:43,347 (trainer:732) INFO: 52epoch:train:3401-3500batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=68.987, loss_att=57.616, acc=0.701, loss=61.027, backward_time=1.030, grad_norm=121.698, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.983e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 06:12:59,043 (trainer:732) INFO: 52epoch:train:3501-3600batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=70.640, loss_att=49.445, acc=0.720, loss=55.804, backward_time=1.027, grad_norm=163.147, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.983e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 06:15:17,290 (trainer:732) INFO: 52epoch:train:3601-3700batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=72.309, loss_att=57.367, acc=0.701, loss=61.849, backward_time=1.036, grad_norm=192.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.982e-05, train_time=2.765 +[gpub001:0/64] 2023-07-15 06:17:32,908 (trainer:732) INFO: 52epoch:train:3701-3800batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=56.135, loss_att=43.432, acc=0.725, loss=47.242, backward_time=1.027, grad_norm=120.250, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.982e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 06:19:48,547 (trainer:732) INFO: 52epoch:train:3801-3900batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=64.984, loss_att=53.140, acc=0.717, loss=56.693, backward_time=1.026, grad_norm=124.613, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.981e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 06:22:04,604 (trainer:732) INFO: 52epoch:train:3901-4000batch: iter_time=1.046e-04, forward_time=0.146, loss_ctc=75.460, loss_att=63.239, acc=0.704, loss=66.905, backward_time=1.030, grad_norm=124.785, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.981e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 06:24:20,473 (trainer:732) INFO: 52epoch:train:4001-4100batch: iter_time=1.162e-04, forward_time=0.145, loss_ctc=68.466, loss_att=55.208, acc=0.726, loss=59.186, backward_time=1.028, grad_norm=118.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.980e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 06:25:55,117 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 06:26:13,418 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 06:26:16,916 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 06:26:16,916 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 06:26:16,923 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 06:30:23,969 (trainer:732) INFO: 52epoch:train:4101-4200batch: iter_time=1.316, forward_time=0.182, loss_ctc=70.188, loss_att=50.115, acc=0.713, loss=56.137, backward_time=1.038, grad_norm=155.015, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=4.980e-05, train_time=7.268 +[gpub001:0/64] 2023-07-15 06:32:52,217 (trainer:732) INFO: 52epoch:train:4201-4300batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=69.316, loss_att=51.445, acc=0.717, loss=56.806, backward_time=1.043, grad_norm=137.794, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.979e-05, train_time=2.966 +[gpub001:0/64] 2023-07-15 06:35:08,342 (trainer:732) INFO: 52epoch:train:4301-4400batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=72.977, loss_att=57.668, acc=0.705, loss=62.261, backward_time=1.029, grad_norm=141.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.979e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 06:37:26,814 (trainer:732) INFO: 52epoch:train:4401-4500batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=63.643, loss_att=45.419, acc=0.719, loss=50.886, backward_time=1.056, grad_norm=113.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.978e-05, train_time=2.769 +[gpub001:0/64] 2023-07-15 06:39:57,698 (trainer:732) INFO: 52epoch:train:4501-4600batch: iter_time=1.134e-04, forward_time=0.147, loss_ctc=75.027, loss_att=58.300, acc=0.701, loss=63.318, backward_time=1.043, grad_norm=180.474, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.978e-05, train_time=3.017 +[gpub001:0/64] 2023-07-15 06:42:13,491 (trainer:732) INFO: 52epoch:train:4601-4700batch: iter_time=1.131e-04, forward_time=0.147, loss_ctc=56.007, loss_att=43.374, acc=0.727, loss=47.164, backward_time=1.027, grad_norm=140.554, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.977e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 06:44:29,393 (trainer:732) INFO: 52epoch:train:4701-4800batch: iter_time=1.151e-04, forward_time=0.147, loss_ctc=67.817, loss_att=52.699, acc=0.717, loss=57.235, backward_time=1.029, grad_norm=125.462, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.977e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 06:46:45,318 (trainer:732) INFO: 52epoch:train:4801-4900batch: iter_time=1.277e-04, forward_time=0.146, loss_ctc=73.382, loss_att=62.579, acc=0.709, loss=65.820, backward_time=1.029, grad_norm=138.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.976e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 06:49:01,133 (trainer:732) INFO: 52epoch:train:4901-5000batch: iter_time=1.282e-04, forward_time=0.148, loss_ctc=67.029, loss_att=51.868, acc=0.723, loss=56.416, backward_time=1.028, grad_norm=126.438, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.976e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 06:49:16,564 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 06:49:34,812 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 06:49:38,218 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 06:49:38,218 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-15 06:49:38,224 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 06:55:32,657 (trainer:732) INFO: 52epoch:train:5001-5100batch: iter_time=2.462, forward_time=0.167, loss_ctc=69.941, loss_att=52.394, acc=0.707, loss=57.658, backward_time=1.039, grad_norm=123.555, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.975e-05, train_time=7.830 +[gpub001:0/64] 2023-07-15 06:57:48,838 (trainer:732) INFO: 52epoch:train:5101-5200batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=67.509, loss_att=53.022, acc=0.712, loss=57.368, backward_time=1.027, grad_norm=124.915, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.975e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 07:00:08,648 (trainer:732) INFO: 52epoch:train:5201-5300batch: iter_time=1.262e-04, forward_time=0.145, loss_ctc=66.544, loss_att=46.869, acc=0.728, loss=52.771, backward_time=1.037, grad_norm=123.961, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.974e-05, train_time=2.796 +[gpub001:0/64] 2023-07-15 07:02:28,465 (trainer:732) INFO: 52epoch:train:5301-5400batch: iter_time=1.234e-04, forward_time=0.147, loss_ctc=76.176, loss_att=59.538, acc=0.700, loss=64.529, backward_time=1.041, grad_norm=207.376, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.974e-05, train_time=2.796 +[gpub001:0/64] 2023-07-15 07:04:57,605 (trainer:732) INFO: 52epoch:train:5401-5500batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=59.021, loss_att=44.631, acc=0.724, loss=48.948, backward_time=1.045, grad_norm=134.157, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.973e-05, train_time=2.983 +[gpub001:0/64] 2023-07-15 07:07:19,312 (trainer:732) INFO: 52epoch:train:5501-5600batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=60.113, loss_att=46.606, acc=0.727, loss=50.658, backward_time=1.044, grad_norm=107.221, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.973e-05, train_time=2.834 +[gpub001:0/64] 2023-07-15 07:09:42,242 (trainer:732) INFO: 52epoch:train:5601-5700batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=74.213, loss_att=63.764, acc=0.702, loss=66.899, backward_time=1.047, grad_norm=124.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.972e-05, train_time=2.858 +[gpub001:0/64] 2023-07-15 07:11:58,361 (trainer:732) INFO: 52epoch:train:5701-5800batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=72.924, loss_att=58.764, acc=0.720, loss=63.012, backward_time=1.029, grad_norm=109.159, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.972e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 07:12:46,518 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 07:13:04,771 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 07:13:08,290 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 07:13:08,290 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 07:13:08,297 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 07:17:17,109 (trainer:732) INFO: 52epoch:train:5801-5900batch: iter_time=1.320, forward_time=0.194, loss_ctc=67.235, loss_att=50.707, acc=0.707, loss=55.665, backward_time=1.041, grad_norm=126.723, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.971e-05, train_time=6.375 +[gpub001:0/64] 2023-07-15 07:19:34,139 (trainer:732) INFO: 52epoch:train:5901-6000batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=67.571, loss_att=55.834, acc=0.707, loss=59.355, backward_time=1.028, grad_norm=119.801, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.971e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 07:21:50,446 (trainer:732) INFO: 52epoch:train:6001-6100batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=69.750, loss_att=48.735, acc=0.721, loss=55.039, backward_time=1.028, grad_norm=137.401, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.970e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 07:24:05,901 (trainer:732) INFO: 52epoch:train:6101-6200batch: iter_time=1.260e-04, forward_time=0.143, loss_ctc=74.044, loss_att=56.325, acc=0.704, loss=61.641, backward_time=1.026, grad_norm=136.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.970e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 07:26:21,636 (trainer:732) INFO: 52epoch:train:6201-6300batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=55.855, loss_att=43.352, acc=0.724, loss=47.103, backward_time=1.028, grad_norm=115.672, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.969e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 07:28:42,756 (trainer:732) INFO: 52epoch:train:6301-6400batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=62.969, loss_att=51.098, acc=0.723, loss=54.659, backward_time=1.046, grad_norm=130.164, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.969e-05, train_time=2.822 +[gpub001:0/64] 2023-07-15 07:31:00,939 (trainer:732) INFO: 52epoch:train:6401-6500batch: iter_time=1.146e-04, forward_time=0.147, loss_ctc=75.551, loss_att=63.770, acc=0.701, loss=67.304, backward_time=1.032, grad_norm=129.815, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.968e-05, train_time=2.763 +[gpub001:0/64] 2023-07-15 07:33:28,577 (trainer:732) INFO: 52epoch:train:6501-6600batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=68.149, loss_att=54.330, acc=0.725, loss=58.476, backward_time=1.036, grad_norm=113.777, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.968e-05, train_time=2.953 +[gpub001:0/64] 2023-07-15 07:35:03,598 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 07:35:21,711 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 07:35:25,095 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 07:35:25,095 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-15 07:35:25,101 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 07:39:15,653 (trainer:732) INFO: 52epoch:train:6601-6700batch: iter_time=1.307, forward_time=0.166, loss_ctc=70.728, loss_att=49.460, acc=0.717, loss=55.840, backward_time=1.039, grad_norm=123.671, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.967e-05, train_time=6.941 +[gpub001:0/64] 2023-07-15 07:41:33,326 (trainer:732) INFO: 52epoch:train:6701-6800batch: iter_time=1.170e-04, forward_time=0.145, loss_ctc=68.471, loss_att=51.019, acc=0.714, loss=56.255, backward_time=1.034, grad_norm=118.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.967e-05, train_time=2.754 +[gpub001:0/64] 2023-07-15 07:43:49,511 (trainer:732) INFO: 52epoch:train:6801-6900batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=72.102, loss_att=56.769, acc=0.710, loss=61.369, backward_time=1.028, grad_norm=153.381, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.966e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 07:46:08,170 (trainer:732) INFO: 52epoch:train:6901-7000batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=62.933, loss_att=45.330, acc=0.724, loss=50.611, backward_time=1.039, grad_norm=115.571, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.966e-05, train_time=2.773 +[gpub001:0/64] 2023-07-15 07:48:28,609 (trainer:732) INFO: 52epoch:train:7001-7100batch: iter_time=1.199e-04, forward_time=0.147, loss_ctc=76.028, loss_att=57.030, acc=0.707, loss=62.729, backward_time=1.034, grad_norm=162.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.965e-05, train_time=2.809 +[gpub001:0/64] 2023-07-15 07:50:46,730 (trainer:732) INFO: 52epoch:train:7101-7200batch: iter_time=1.191e-04, forward_time=0.146, loss_ctc=56.737, loss_att=43.740, acc=0.729, loss=47.639, backward_time=1.031, grad_norm=117.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.965e-05, train_time=2.762 +[gpub001:0/64] 2023-07-15 07:53:15,548 (trainer:732) INFO: 52epoch:train:7201-7300batch: iter_time=1.316e-04, forward_time=0.146, loss_ctc=67.374, loss_att=53.427, acc=0.716, loss=57.611, backward_time=1.037, grad_norm=133.942, clip=100.000, loss_scale=4.608e+32, optim_step_time=0.182, optim0_lr0=4.964e-05, train_time=2.976 +[gpub001:0/64] 2023-07-15 07:55:26,256 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 07:55:39,865 (trainer:732) INFO: 52epoch:train:7301-7400batch: iter_time=1.232e-04, forward_time=0.148, loss_ctc=73.022, loss_att=61.882, acc=0.708, loss=65.224, backward_time=1.056, grad_norm=127.732, clip=100.000, loss_scale=6.159e+32, optim_step_time=0.182, optim0_lr0=4.964e-05, train_time=2.886 +[gpub001:0/64] 2023-07-15 07:58:00,543 (trainer:732) INFO: 52epoch:train:7401-7500batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=65.887, loss_att=51.810, acc=0.721, loss=56.033, backward_time=1.031, grad_norm=112.643, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.963e-05, train_time=2.813 +[gpub001:0/64] 2023-07-15 07:58:11,614 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 07:58:29,689 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 07:58:33,138 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 07:58:33,138 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 07:58:33,144 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 08:04:38,481 (trainer:732) INFO: 52epoch:train:7501-7600batch: iter_time=2.524, forward_time=0.173, loss_ctc=68.274, loss_att=52.729, acc=0.716, loss=57.393, backward_time=1.043, grad_norm=122.527, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.963e-05, train_time=7.958 +[gpub001:0/64] 2023-07-15 08:06:55,378 (trainer:732) INFO: 52epoch:train:7601-7700batch: iter_time=1.197e-04, forward_time=0.150, loss_ctc=66.701, loss_att=51.740, acc=0.717, loss=56.228, backward_time=1.030, grad_norm=132.874, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.962e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 08:09:14,728 (trainer:732) INFO: 52epoch:train:7701-7800batch: iter_time=1.194e-04, forward_time=0.163, loss_ctc=66.481, loss_att=47.413, acc=0.730, loss=53.133, backward_time=1.030, grad_norm=123.853, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.201, optim0_lr0=4.962e-05, train_time=2.787 +[gpub001:0/64] 2023-07-15 08:11:09,763 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 08:11:31,528 (trainer:732) INFO: 52epoch:train:7801-7900batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=76.969, loss_att=61.099, acc=0.712, loss=65.860, backward_time=1.030, grad_norm=147.788, clip=100.000, loss_scale=2.980e+32, optim_step_time=0.182, optim0_lr0=4.961e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 08:13:47,018 (trainer:732) INFO: 52epoch:train:7901-8000batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=58.050, loss_att=42.970, acc=0.735, loss=47.494, backward_time=1.025, grad_norm=116.421, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.961e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 08:16:02,954 (trainer:732) INFO: 52epoch:train:8001-8100batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=60.651, loss_att=47.371, acc=0.734, loss=51.355, backward_time=1.027, grad_norm=123.406, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.960e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 08:18:19,062 (trainer:732) INFO: 52epoch:train:8101-8200batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=72.709, loss_att=62.133, acc=0.722, loss=65.306, backward_time=1.028, grad_norm=112.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.960e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 08:20:35,779 (trainer:732) INFO: 52epoch:train:8201-8300batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=72.592, loss_att=59.622, acc=0.726, loss=63.513, backward_time=1.028, grad_norm=112.657, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.960e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 08:21:28,081 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 08:21:46,305 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 08:21:50,018 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 08:21:50,018 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 08:21:50,025 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 08:27:03,659 (trainer:732) INFO: 52epoch:train:8301-8400batch: iter_time=1.383, forward_time=0.161, loss_ctc=63.582, loss_att=46.401, acc=0.711, loss=51.555, backward_time=1.043, grad_norm=110.495, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.959e-05, train_time=7.757 +[gpub001:0/64] 2023-07-15 08:29:20,503 (trainer:732) INFO: 52epoch:train:8401-8500batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=71.706, loss_att=52.790, acc=0.718, loss=58.465, backward_time=1.029, grad_norm=132.484, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.959e-05, train_time=2.737 +[gpub001:0/64] 2023-07-15 08:31:36,508 (trainer:732) INFO: 52epoch:train:8501-8600batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=69.582, loss_att=53.043, acc=0.721, loss=58.005, backward_time=1.030, grad_norm=129.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.958e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 08:33:53,300 (trainer:732) INFO: 52epoch:train:8601-8700batch: iter_time=1.138e-04, forward_time=0.146, loss_ctc=63.441, loss_att=46.306, acc=0.719, loss=51.447, backward_time=1.030, grad_norm=134.435, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.958e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 08:36:12,547 (trainer:732) INFO: 52epoch:train:8701-8800batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=72.367, loss_att=57.240, acc=0.706, loss=61.778, backward_time=1.040, grad_norm=144.562, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.957e-05, train_time=2.785 +[gpub001:0/64] 2023-07-15 08:38:28,158 (trainer:732) INFO: 52epoch:train:8801-8900batch: iter_time=1.144e-04, forward_time=0.146, loss_ctc=54.536, loss_att=41.153, acc=0.739, loss=45.168, backward_time=1.028, grad_norm=114.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.957e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 08:40:58,526 (trainer:732) INFO: 52epoch:train:8901-9000batch: iter_time=1.149e-04, forward_time=0.147, loss_ctc=69.535, loss_att=56.601, acc=0.709, loss=60.481, backward_time=1.044, grad_norm=139.440, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.956e-05, train_time=3.007 +[gpub001:0/64] 2023-07-15 08:43:14,542 (trainer:732) INFO: 52epoch:train:9001-9100batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=76.360, loss_att=63.389, acc=0.715, loss=67.281, backward_time=1.029, grad_norm=114.369, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.956e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 08:44:48,439 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 08:45:06,514 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 08:45:09,993 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 08:45:09,993 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 08:45:09,999 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 08:50:38,705 (trainer:732) INFO: 52epoch:train:9101-9200batch: iter_time=1.386, forward_time=0.204, loss_ctc=64.693, loss_att=46.910, acc=0.724, loss=52.245, backward_time=1.042, grad_norm=132.581, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.955e-05, train_time=8.882 +[gpub001:0/64] 2023-07-15 08:52:56,341 (trainer:732) INFO: 52epoch:train:9201-9300batch: iter_time=1.218e-04, forward_time=0.149, loss_ctc=67.583, loss_att=50.815, acc=0.725, loss=55.846, backward_time=1.032, grad_norm=120.075, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.955e-05, train_time=2.753 +[gpub001:0/64] 2023-07-15 08:55:13,695 (trainer:732) INFO: 52epoch:train:9301-9400batch: iter_time=1.130e-04, forward_time=0.149, loss_ctc=72.507, loss_att=57.099, acc=0.717, loss=61.721, backward_time=1.030, grad_norm=148.100, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.954e-05, train_time=2.747 +[gpub001:0/64] 2023-07-15 08:57:29,816 (trainer:732) INFO: 52epoch:train:9401-9500batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=61.723, loss_att=44.944, acc=0.730, loss=49.978, backward_time=1.028, grad_norm=107.006, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.954e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 08:59:46,404 (trainer:732) INFO: 52epoch:train:9501-9600batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=73.882, loss_att=57.515, acc=0.715, loss=62.425, backward_time=1.031, grad_norm=134.396, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.953e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 09:02:02,343 (trainer:732) INFO: 52epoch:train:9601-9700batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=57.834, loss_att=43.892, acc=0.733, loss=48.075, backward_time=1.027, grad_norm=105.315, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.953e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 09:04:18,628 (trainer:732) INFO: 52epoch:train:9701-9800batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=69.046, loss_att=55.521, acc=0.724, loss=59.578, backward_time=1.028, grad_norm=134.539, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.952e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 09:06:40,864 (trainer:732) INFO: 52epoch:train:9801-9900batch: iter_time=1.086e-04, forward_time=0.146, loss_ctc=71.846, loss_att=59.131, acc=0.721, loss=62.945, backward_time=1.034, grad_norm=118.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.952e-05, train_time=2.844 +[gpub001:0/64] 2023-07-15 09:08:57,505 (trainer:732) INFO: 52epoch:train:9901-10000batch: iter_time=1.061e-04, forward_time=0.148, loss_ctc=67.461, loss_att=52.187, acc=0.727, loss=56.769, backward_time=1.031, grad_norm=132.078, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.951e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 09:23:19,047 (trainer:338) INFO: 52epoch results: [train] iter_time=0.232, forward_time=0.151, loss_ctc=68.405, loss_att=53.118, acc=0.717, loss=57.704, backward_time=1.034, grad_norm=132.000, clip=100.000, loss_scale=3.065e+32, optim_step_time=0.182, optim0_lr0=4.975e-05, train_time=3.365, time=4 hours, 40 minutes and 41.9 seconds, total_count=490000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=43.162, cer_ctc=0.252, loss_att=38.727, acc=0.677, cer=0.407, wer=0.996, loss=40.058, time=8 minutes and 9.5 seconds, total_count=50094, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 54.24 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-15 09:23:36,296 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-15 09:23:36,354 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/47epoch.pth +[gpub001:0/64] 2023-07-15 09:23:36,354 (trainer:272) INFO: 53/60epoch started. Estimated time to finish: 1 day, 15 hours and 44 minutes +[gpub001:0/64] 2023-07-15 09:23:37,760 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-15 09:23:55,704 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 09:23:59,027 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 09:23:59,027 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 09:23:59,041 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 09:31:30,451 (trainer:732) INFO: 53epoch:train:1-100batch: iter_time=3.311, forward_time=0.176, loss_ctc=77.249, loss_att=58.785, acc=0.708, loss=64.324, backward_time=1.043, grad_norm=155.630, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=4.951e-05, train_time=9.474 +[gpub001:0/64] 2023-07-15 09:33:55,969 (trainer:732) INFO: 53epoch:train:101-200batch: iter_time=1.288e-04, forward_time=0.189, loss_ctc=71.700, loss_att=52.162, acc=0.707, loss=58.023, backward_time=1.035, grad_norm=122.394, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.950e-05, train_time=2.911 +[gpub001:0/64] 2023-07-15 09:36:26,554 (trainer:732) INFO: 53epoch:train:201-300batch: iter_time=0.001, forward_time=0.236, loss_ctc=66.496, loss_att=46.521, acc=0.739, loss=52.513, backward_time=1.048, grad_norm=117.834, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.950e-05, train_time=3.011 +[gpub001:0/64] 2023-07-15 09:39:01,453 (trainer:732) INFO: 53epoch:train:301-400batch: iter_time=9.101e-04, forward_time=0.287, loss_ctc=75.211, loss_att=58.135, acc=0.711, loss=63.258, backward_time=1.053, grad_norm=108.586, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.190, optim0_lr0=4.949e-05, train_time=3.098 +[gpub001:0/64] 2023-07-15 09:41:30,212 (trainer:732) INFO: 53epoch:train:401-500batch: iter_time=3.592e-04, forward_time=0.238, loss_ctc=68.220, loss_att=49.582, acc=0.727, loss=55.173, backward_time=1.045, grad_norm=122.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=4.949e-05, train_time=2.975 +[gpub001:0/64] 2023-07-15 09:43:59,845 (trainer:732) INFO: 53epoch:train:501-600batch: iter_time=0.003, forward_time=0.238, loss_ctc=65.657, loss_att=47.631, acc=0.729, loss=53.039, backward_time=1.049, grad_norm=152.877, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=4.948e-05, train_time=2.992 +[gpub001:0/64] 2023-07-15 09:46:31,374 (trainer:732) INFO: 53epoch:train:601-700batch: iter_time=1.153e-04, forward_time=0.177, loss_ctc=72.494, loss_att=54.683, acc=0.710, loss=60.026, backward_time=1.055, grad_norm=148.168, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=4.948e-05, train_time=3.031 +[gpub001:0/64] 2023-07-15 09:49:00,984 (trainer:732) INFO: 53epoch:train:701-800batch: iter_time=6.123e-04, forward_time=0.231, loss_ctc=65.196, loss_att=47.527, acc=0.726, loss=52.828, backward_time=1.042, grad_norm=130.075, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.947e-05, train_time=2.992 +[gpub001:0/64] 2023-07-15 09:49:58,288 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-15 09:50:16,392 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 09:50:19,748 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 09:50:19,748 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 09:50:19,754 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 09:58:24,759 (trainer:732) INFO: 53epoch:train:801-900batch: iter_time=4.199, forward_time=0.197, loss_ctc=70.653, loss_att=50.295, acc=0.719, loss=56.402, backward_time=1.041, grad_norm=118.018, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.947e-05, train_time=11.275 +[gpub001:0/64] 2023-07-15 10:00:41,776 (trainer:732) INFO: 53epoch:train:901-1000batch: iter_time=1.306e-04, forward_time=0.151, loss_ctc=77.224, loss_att=55.334, acc=0.705, loss=61.901, backward_time=1.032, grad_norm=132.329, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.946e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 10:02:57,402 (trainer:732) INFO: 53epoch:train:1001-1100batch: iter_time=1.317e-04, forward_time=0.148, loss_ctc=64.572, loss_att=46.014, acc=0.728, loss=51.581, backward_time=1.027, grad_norm=123.422, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.946e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 10:05:13,259 (trainer:732) INFO: 53epoch:train:1101-1200batch: iter_time=1.391e-04, forward_time=0.148, loss_ctc=73.301, loss_att=55.843, acc=0.715, loss=61.080, backward_time=1.029, grad_norm=135.140, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.945e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 10:07:28,967 (trainer:732) INFO: 53epoch:train:1201-1300batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=63.739, loss_att=47.917, acc=0.729, loss=52.664, backward_time=1.028, grad_norm=114.091, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.945e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 10:09:44,862 (trainer:732) INFO: 53epoch:train:1301-1400batch: iter_time=1.252e-04, forward_time=0.149, loss_ctc=67.179, loss_att=47.097, acc=0.730, loss=53.122, backward_time=1.028, grad_norm=116.673, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.944e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 10:12:00,515 (trainer:732) INFO: 53epoch:train:1401-1500batch: iter_time=1.255e-04, forward_time=0.148, loss_ctc=68.130, loss_att=49.836, acc=0.714, loss=55.324, backward_time=1.027, grad_norm=129.206, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.944e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 10:14:17,608 (trainer:732) INFO: 53epoch:train:1501-1600batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=67.907, loss_att=53.286, acc=0.708, loss=57.672, backward_time=1.028, grad_norm=135.533, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=2.742 +[gpub001:0/64] 2023-07-15 10:15:56,480 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 10:16:14,877 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 10:16:18,381 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 10:16:18,381 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-15 10:16:18,387 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 10:20:43,071 (trainer:732) INFO: 53epoch:train:1601-1700batch: iter_time=1.340, forward_time=0.148, loss_ctc=70.666, loss_att=56.816, acc=0.715, loss=60.971, backward_time=1.036, grad_norm=162.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=7.709 +[gpub001:0/64] 2023-07-15 10:22:59,760 (trainer:732) INFO: 53epoch:train:1701-1800batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=71.652, loss_att=52.533, acc=0.712, loss=58.269, backward_time=1.033, grad_norm=141.266, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 10:25:15,620 (trainer:732) INFO: 53epoch:train:1801-1900batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=76.945, loss_att=54.785, acc=0.701, loss=61.433, backward_time=1.028, grad_norm=120.255, clip=100.000, loss_scale=1.882e+32, optim_step_time=0.182, optim0_lr0=4.942e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 10:27:32,921 (trainer:732) INFO: 53epoch:train:1901-2000batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=65.557, loss_att=44.420, acc=0.733, loss=50.761, backward_time=1.026, grad_norm=170.277, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.942e-05, train_time=2.746 +[gpub001:0/64] 2023-07-15 10:29:54,896 (trainer:732) INFO: 53epoch:train:2001-2100batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=78.932, loss_att=60.005, acc=0.708, loss=65.683, backward_time=1.032, grad_norm=154.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.941e-05, train_time=2.839 +[gpub001:0/64] 2023-07-15 10:32:19,721 (trainer:732) INFO: 53epoch:train:2101-2200batch: iter_time=1.312e-04, forward_time=0.146, loss_ctc=65.440, loss_att=48.548, acc=0.725, loss=53.615, backward_time=1.039, grad_norm=135.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.941e-05, train_time=2.896 +[gpub001:0/64] 2023-07-15 10:34:37,291 (trainer:732) INFO: 53epoch:train:2201-2300batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=63.624, loss_att=44.869, acc=0.738, loss=50.495, backward_time=1.031, grad_norm=144.749, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.940e-05, train_time=2.751 +[gpub001:0/64] 2023-07-15 10:36:56,681 (trainer:732) INFO: 53epoch:train:2301-2400batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=71.643, loss_att=51.467, acc=0.712, loss=57.520, backward_time=1.028, grad_norm=180.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.940e-05, train_time=2.788 +[gpub001:0/64] 2023-07-15 10:39:14,003 (trainer:732) INFO: 53epoch:train:2401-2500batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=64.355, loss_att=47.519, acc=0.719, loss=52.570, backward_time=1.029, grad_norm=114.242, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.939e-05, train_time=2.746 +[gpub001:0/64] 2023-07-15 10:39:17,582 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 10:39:35,923 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 10:39:39,455 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 10:39:39,455 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 10:39:39,461 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 10:44:46,978 (trainer:732) INFO: 53epoch:train:2501-2600batch: iter_time=1.885, forward_time=0.176, loss_ctc=73.411, loss_att=53.483, acc=0.721, loss=59.461, backward_time=1.044, grad_norm=123.712, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.939e-05, train_time=6.659 +[gpub001:0/64] 2023-07-15 10:47:15,365 (trainer:732) INFO: 53epoch:train:2601-2700batch: iter_time=1.000e-04, forward_time=0.145, loss_ctc=75.773, loss_att=56.053, acc=0.699, loss=61.969, backward_time=1.046, grad_norm=116.565, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.938e-05, train_time=2.968 +[gpub001:0/64] 2023-07-15 10:49:45,331 (trainer:732) INFO: 53epoch:train:2701-2800batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=65.403, loss_att=45.525, acc=0.728, loss=51.488, backward_time=1.046, grad_norm=116.454, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.938e-05, train_time=2.999 +[gpub001:0/64] 2023-07-15 10:52:05,173 (trainer:732) INFO: 53epoch:train:2801-2900batch: iter_time=1.075e-04, forward_time=0.144, loss_ctc=73.294, loss_att=53.142, acc=0.721, loss=59.188, backward_time=1.038, grad_norm=148.478, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.937e-05, train_time=2.797 +[gpub001:0/64] 2023-07-15 10:54:39,743 (trainer:732) INFO: 53epoch:train:2901-3000batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=67.600, loss_att=51.971, acc=0.725, loss=56.660, backward_time=1.042, grad_norm=138.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.937e-05, train_time=3.091 +[gpub001:0/64] 2023-07-15 10:56:58,255 (trainer:732) INFO: 53epoch:train:3001-3100batch: iter_time=1.071e-04, forward_time=0.144, loss_ctc=65.833, loss_att=46.073, acc=0.732, loss=52.001, backward_time=1.031, grad_norm=112.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.936e-05, train_time=2.770 +[gpub001:0/64] 2023-07-15 10:59:19,483 (trainer:732) INFO: 53epoch:train:3101-3200batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=66.802, loss_att=49.191, acc=0.717, loss=54.475, backward_time=1.039, grad_norm=159.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.936e-05, train_time=2.824 +[gpub001:0/64] 2023-07-15 11:00:27,528 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 11:01:42,885 (trainer:732) INFO: 53epoch:train:3201-3300batch: iter_time=1.020e-04, forward_time=0.145, loss_ctc=69.338, loss_att=52.658, acc=0.720, loss=57.662, backward_time=1.040, grad_norm=112.301, clip=100.000, loss_scale=2.351e+32, optim_step_time=0.182, optim0_lr0=4.935e-05, train_time=2.868 +[gpub001:0/64] 2023-07-15 11:02:40,035 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 11:02:58,063 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 11:03:01,531 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 11:03:01,531 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 11:03:01,537 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 11:07:57,739 (trainer:732) INFO: 53epoch:train:3301-3400batch: iter_time=2.006, forward_time=0.145, loss_ctc=69.350, loss_att=48.327, acc=0.728, loss=54.634, backward_time=1.039, grad_norm=118.392, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.935e-05, train_time=7.497 +[gpub001:0/64] 2023-07-15 11:10:14,439 (trainer:732) INFO: 53epoch:train:3401-3500batch: iter_time=1.282e-04, forward_time=0.147, loss_ctc=77.528, loss_att=60.141, acc=0.707, loss=65.357, backward_time=1.032, grad_norm=137.504, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.934e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 11:12:30,025 (trainer:732) INFO: 53epoch:train:3501-3600batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=67.358, loss_att=47.997, acc=0.719, loss=53.805, backward_time=1.026, grad_norm=117.273, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.934e-05, train_time=2.711 +[gpub001:0/64] 2023-07-15 11:14:48,416 (trainer:732) INFO: 53epoch:train:3601-3700batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=73.623, loss_att=53.854, acc=0.738, loss=59.785, backward_time=1.029, grad_norm=143.444, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.933e-05, train_time=2.768 +[gpub001:0/64] 2023-07-15 11:17:21,226 (trainer:732) INFO: 53epoch:train:3701-3800batch: iter_time=1.341e-04, forward_time=0.146, loss_ctc=70.255, loss_att=50.718, acc=0.718, loss=56.579, backward_time=1.047, grad_norm=167.446, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.933e-05, train_time=3.056 +[gpub001:0/64] 2023-07-15 11:19:38,219 (trainer:732) INFO: 53epoch:train:3801-3900batch: iter_time=1.312e-04, forward_time=0.148, loss_ctc=65.971, loss_att=48.571, acc=0.738, loss=53.791, backward_time=1.029, grad_norm=126.572, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.187, optim0_lr0=4.932e-05, train_time=2.740 +[gpub001:0/64] 2023-07-15 11:21:58,735 (trainer:732) INFO: 53epoch:train:3901-4000batch: iter_time=1.312e-04, forward_time=0.147, loss_ctc=63.471, loss_att=47.210, acc=0.729, loss=52.089, backward_time=1.030, grad_norm=141.710, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.932e-05, train_time=2.810 +[gpub001:0/64] 2023-07-15 11:24:16,739 (trainer:732) INFO: 53epoch:train:4001-4100batch: iter_time=1.362e-04, forward_time=0.148, loss_ctc=71.783, loss_att=52.931, acc=0.721, loss=58.586, backward_time=1.028, grad_norm=117.704, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.931e-05, train_time=2.760 +[gpub001:0/64] 2023-07-15 11:25:58,082 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 11:26:16,209 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 11:26:19,575 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 11:26:19,575 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-15 11:26:19,581 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 11:31:12,862 (trainer:732) INFO: 53epoch:train:4101-4200batch: iter_time=1.396, forward_time=0.174, loss_ctc=67.126, loss_att=50.278, acc=0.730, loss=55.332, backward_time=1.038, grad_norm=122.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.931e-05, train_time=8.322 +[gpub001:0/64] 2023-07-15 11:33:32,008 (trainer:732) INFO: 53epoch:train:4201-4300batch: iter_time=1.106e-04, forward_time=0.146, loss_ctc=72.073, loss_att=52.426, acc=0.717, loss=58.320, backward_time=1.031, grad_norm=122.383, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.931e-05, train_time=2.783 +[gpub001:0/64] 2023-07-15 11:35:47,563 (trainer:732) INFO: 53epoch:train:4301-4400batch: iter_time=9.438e-05, forward_time=0.144, loss_ctc=75.067, loss_att=52.917, acc=0.710, loss=59.562, backward_time=1.028, grad_norm=140.286, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.930e-05, train_time=2.711 +[gpub001:0/64] 2023-07-15 11:38:03,264 (trainer:732) INFO: 53epoch:train:4401-4500batch: iter_time=1.051e-04, forward_time=0.146, loss_ctc=63.278, loss_att=44.313, acc=0.735, loss=50.003, backward_time=1.027, grad_norm=155.846, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.930e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 11:40:20,170 (trainer:732) INFO: 53epoch:train:4501-4600batch: iter_time=1.002e-04, forward_time=0.145, loss_ctc=75.680, loss_att=58.305, acc=0.715, loss=63.517, backward_time=1.032, grad_norm=150.291, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.929e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 11:42:36,442 (trainer:732) INFO: 53epoch:train:4601-4700batch: iter_time=1.032e-04, forward_time=0.144, loss_ctc=65.197, loss_att=48.186, acc=0.727, loss=53.289, backward_time=1.027, grad_norm=138.175, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.929e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 11:44:54,859 (trainer:732) INFO: 53epoch:train:4701-4800batch: iter_time=9.797e-05, forward_time=0.145, loss_ctc=64.764, loss_att=45.735, acc=0.737, loss=51.444, backward_time=1.029, grad_norm=220.036, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.928e-05, train_time=2.768 +[gpub001:0/64] 2023-07-15 11:47:16,838 (trainer:732) INFO: 53epoch:train:4801-4900batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=70.186, loss_att=49.914, acc=0.714, loss=55.995, backward_time=1.034, grad_norm=126.685, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.928e-05, train_time=2.839 +[gpub001:0/64] 2023-07-15 11:49:35,518 (trainer:732) INFO: 53epoch:train:4901-5000batch: iter_time=1.101e-04, forward_time=0.144, loss_ctc=63.751, loss_att=47.279, acc=0.720, loss=52.221, backward_time=1.036, grad_norm=128.634, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=2.773 +[gpub001:0/64] 2023-07-15 11:49:40,092 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 11:49:58,371 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 11:50:01,785 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 11:50:01,785 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 11:50:01,791 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 11:56:06,405 (trainer:732) INFO: 53epoch:train:5001-5100batch: iter_time=1.688, forward_time=0.158, loss_ctc=76.559, loss_att=57.521, acc=0.703, loss=63.233, backward_time=1.040, grad_norm=125.506, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=7.818 +[gpub001:0/64] 2023-07-15 11:58:22,014 (trainer:732) INFO: 53epoch:train:5101-5200batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=69.719, loss_att=48.716, acc=0.715, loss=55.017, backward_time=1.026, grad_norm=133.259, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.926e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 12:00:37,642 (trainer:732) INFO: 53epoch:train:5201-5300batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=66.930, loss_att=46.500, acc=0.734, loss=52.629, backward_time=1.027, grad_norm=125.915, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.926e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 12:02:53,478 (trainer:732) INFO: 53epoch:train:5301-5400batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=73.301, loss_att=55.694, acc=0.715, loss=60.976, backward_time=1.028, grad_norm=137.805, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.925e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 12:05:09,897 (trainer:732) INFO: 53epoch:train:5401-5500batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=67.056, loss_att=49.311, acc=0.723, loss=54.635, backward_time=1.028, grad_norm=120.485, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.925e-05, train_time=2.728 +[gpub001:0/64] 2023-07-15 12:07:26,024 (trainer:732) INFO: 53epoch:train:5501-5600batch: iter_time=1.192e-04, forward_time=0.147, loss_ctc=63.690, loss_att=46.252, acc=0.732, loss=51.484, backward_time=1.029, grad_norm=139.681, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.924e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 12:09:41,948 (trainer:732) INFO: 53epoch:train:5601-5700batch: iter_time=1.552e-04, forward_time=0.147, loss_ctc=70.104, loss_att=52.001, acc=0.712, loss=57.432, backward_time=1.028, grad_norm=107.690, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.924e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 12:11:58,251 (trainer:732) INFO: 53epoch:train:5701-5800batch: iter_time=1.338e-04, forward_time=0.148, loss_ctc=63.833, loss_att=47.985, acc=0.723, loss=52.739, backward_time=1.029, grad_norm=129.181, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.923e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 12:12:53,156 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 12:13:10,963 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 12:13:14,420 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 12:13:14,420 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 12:13:14,439 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 12:18:21,859 (trainer:732) INFO: 53epoch:train:5801-5900batch: iter_time=1.831, forward_time=0.146, loss_ctc=71.893, loss_att=49.578, acc=0.732, loss=56.273, backward_time=1.038, grad_norm=140.020, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.923e-05, train_time=7.672 +[gpub001:0/64] 2023-07-15 12:20:39,651 (trainer:732) INFO: 53epoch:train:5901-6000batch: iter_time=1.334e-04, forward_time=0.153, loss_ctc=77.718, loss_att=60.620, acc=0.708, loss=65.749, backward_time=1.031, grad_norm=115.613, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.922e-05, train_time=2.756 +[gpub001:0/64] 2023-07-15 12:22:55,460 (trainer:732) INFO: 53epoch:train:6001-6100batch: iter_time=1.329e-04, forward_time=0.147, loss_ctc=66.395, loss_att=47.215, acc=0.723, loss=52.969, backward_time=1.028, grad_norm=119.839, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.922e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 12:25:14,502 (trainer:732) INFO: 53epoch:train:6101-6200batch: iter_time=0.003, forward_time=0.146, loss_ctc=73.384, loss_att=54.398, acc=0.735, loss=60.094, backward_time=1.038, grad_norm=134.904, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.921e-05, train_time=2.781 +[gpub001:0/64] 2023-07-15 12:27:31,260 (trainer:732) INFO: 53epoch:train:6201-6300batch: iter_time=1.497e-04, forward_time=0.148, loss_ctc=68.480, loss_att=49.380, acc=0.721, loss=55.110, backward_time=1.029, grad_norm=133.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.921e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 12:29:47,555 (trainer:732) INFO: 53epoch:train:6301-6400batch: iter_time=9.751e-05, forward_time=0.147, loss_ctc=64.980, loss_att=48.650, acc=0.736, loss=53.549, backward_time=1.029, grad_norm=109.673, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.920e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 12:32:03,537 (trainer:732) INFO: 53epoch:train:6401-6500batch: iter_time=9.952e-05, forward_time=0.147, loss_ctc=64.249, loss_att=47.186, acc=0.732, loss=52.305, backward_time=1.029, grad_norm=131.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.920e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 12:34:25,293 (trainer:732) INFO: 53epoch:train:6501-6600batch: iter_time=6.416e-04, forward_time=0.158, loss_ctc=70.757, loss_att=53.176, acc=0.719, loss=58.450, backward_time=1.030, grad_norm=142.896, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.920e-05, train_time=2.835 +[gpub001:0/64] 2023-07-15 12:36:07,690 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 12:36:25,678 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 12:36:29,157 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 12:36:29,157 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 12:36:29,164 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 12:42:08,942 (trainer:732) INFO: 53epoch:train:6601-6700batch: iter_time=1.626, forward_time=0.195, loss_ctc=66.551, loss_att=48.083, acc=0.738, loss=53.624, backward_time=1.040, grad_norm=118.665, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.919e-05, train_time=9.271 +[gpub001:0/64] 2023-07-15 12:44:26,138 (trainer:732) INFO: 53epoch:train:6701-6800batch: iter_time=1.118e-04, forward_time=0.145, loss_ctc=72.189, loss_att=55.718, acc=0.714, loss=60.660, backward_time=1.030, grad_norm=123.601, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.919e-05, train_time=2.745 +[gpub001:0/64] 2023-07-15 12:46:42,970 (trainer:732) INFO: 53epoch:train:6801-6900batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=74.928, loss_att=53.819, acc=0.718, loss=60.152, backward_time=1.032, grad_norm=125.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.918e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 12:48:58,893 (trainer:732) INFO: 53epoch:train:6901-7000batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=63.121, loss_att=45.095, acc=0.738, loss=50.503, backward_time=1.027, grad_norm=109.171, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.918e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 12:51:14,725 (trainer:732) INFO: 53epoch:train:7001-7100batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=77.407, loss_att=58.128, acc=0.716, loss=63.911, backward_time=1.029, grad_norm=119.319, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.917e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 12:53:30,552 (trainer:732) INFO: 53epoch:train:7101-7200batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=64.425, loss_att=47.865, acc=0.737, loss=52.833, backward_time=1.028, grad_norm=136.329, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.917e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 12:55:46,313 (trainer:732) INFO: 53epoch:train:7201-7300batch: iter_time=1.091e-04, forward_time=0.147, loss_ctc=64.912, loss_att=45.342, acc=0.743, loss=51.213, backward_time=1.029, grad_norm=110.998, clip=100.000, loss_scale=2.499e+32, optim_step_time=0.182, optim0_lr0=4.916e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 12:58:02,139 (trainer:732) INFO: 53epoch:train:7301-7400batch: iter_time=1.144e-04, forward_time=0.147, loss_ctc=70.223, loss_att=50.001, acc=0.723, loss=56.068, backward_time=1.028, grad_norm=128.640, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.916e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 13:00:18,214 (trainer:732) INFO: 53epoch:train:7401-7500batch: iter_time=1.159e-04, forward_time=0.148, loss_ctc=62.811, loss_att=47.105, acc=0.732, loss=51.817, backward_time=1.030, grad_norm=123.147, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.915e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 13:00:22,641 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 13:00:40,613 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 13:00:44,325 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 13:00:44,325 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 13:00:44,332 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 13:06:53,659 (trainer:732) INFO: 53epoch:train:7501-7600batch: iter_time=1.682, forward_time=0.153, loss_ctc=71.689, loss_att=50.993, acc=0.731, loss=57.202, backward_time=1.056, grad_norm=110.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.915e-05, train_time=7.909 +[gpub001:0/64] 2023-07-15 13:09:10,347 (trainer:732) INFO: 53epoch:train:7601-7700batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=75.829, loss_att=57.328, acc=0.712, loss=62.878, backward_time=1.028, grad_norm=137.383, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.914e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 13:11:26,829 (trainer:732) INFO: 53epoch:train:7701-7800batch: iter_time=1.304e-04, forward_time=0.148, loss_ctc=65.217, loss_att=45.189, acc=0.736, loss=51.198, backward_time=1.029, grad_norm=122.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.914e-05, train_time=2.729 +[gpub001:0/64] 2023-07-15 13:13:42,554 (trainer:732) INFO: 53epoch:train:7801-7900batch: iter_time=1.287e-04, forward_time=0.146, loss_ctc=74.146, loss_att=54.219, acc=0.730, loss=60.197, backward_time=1.028, grad_norm=119.101, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.913e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 13:15:58,492 (trainer:732) INFO: 53epoch:train:7901-8000batch: iter_time=1.298e-04, forward_time=0.147, loss_ctc=64.820, loss_att=50.432, acc=0.736, loss=54.748, backward_time=1.029, grad_norm=136.726, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.913e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 13:18:14,196 (trainer:732) INFO: 53epoch:train:8001-8100batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=65.159, loss_att=46.648, acc=0.739, loss=52.201, backward_time=1.027, grad_norm=129.032, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.912e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 13:20:30,190 (trainer:732) INFO: 53epoch:train:8101-8200batch: iter_time=1.194e-04, forward_time=0.148, loss_ctc=67.450, loss_att=49.032, acc=0.722, loss=54.557, backward_time=1.029, grad_norm=149.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.912e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 13:22:47,766 (trainer:732) INFO: 53epoch:train:8201-8300batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=69.780, loss_att=53.305, acc=0.725, loss=58.248, backward_time=1.029, grad_norm=135.432, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.911e-05, train_time=2.751 +[gpub001:0/64] 2023-07-15 13:23:44,787 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 13:24:02,966 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 13:24:06,390 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 13:24:06,390 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 13:24:06,396 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 13:30:26,163 (trainer:732) INFO: 53epoch:train:8301-8400batch: iter_time=2.133, forward_time=0.174, loss_ctc=68.152, loss_att=51.251, acc=0.719, loss=56.321, backward_time=1.042, grad_norm=127.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.911e-05, train_time=9.168 +[gpub001:0/64] 2023-07-15 13:32:43,842 (trainer:732) INFO: 53epoch:train:8401-8500batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=75.378, loss_att=53.951, acc=0.719, loss=60.379, backward_time=1.030, grad_norm=120.972, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.753 +[gpub001:0/64] 2023-07-15 13:35:00,654 (trainer:732) INFO: 53epoch:train:8501-8600batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=63.746, loss_att=45.123, acc=0.736, loss=50.710, backward_time=1.027, grad_norm=118.053, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 13:37:16,881 (trainer:732) INFO: 53epoch:train:8601-8700batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=72.822, loss_att=55.363, acc=0.722, loss=60.601, backward_time=1.029, grad_norm=139.587, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 13:39:32,720 (trainer:732) INFO: 53epoch:train:8701-8800batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=62.997, loss_att=46.111, acc=0.740, loss=51.177, backward_time=1.028, grad_norm=115.444, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.909e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 13:41:48,463 (trainer:732) INFO: 53epoch:train:8801-8900batch: iter_time=1.169e-04, forward_time=0.147, loss_ctc=66.723, loss_att=46.736, acc=0.734, loss=52.732, backward_time=1.028, grad_norm=102.729, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.909e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 13:44:04,390 (trainer:732) INFO: 53epoch:train:8901-9000batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=65.966, loss_att=49.188, acc=0.724, loss=54.222, backward_time=1.029, grad_norm=110.737, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.908e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 13:46:20,504 (trainer:732) INFO: 53epoch:train:9001-9100batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=67.534, loss_att=51.762, acc=0.723, loss=56.493, backward_time=1.030, grad_norm=127.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.908e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 13:47:59,767 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 13:48:18,017 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 13:48:21,456 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 13:48:21,456 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 13:48:21,462 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 13:52:48,491 (trainer:732) INFO: 53epoch:train:9101-9200batch: iter_time=1.496, forward_time=0.181, loss_ctc=70.548, loss_att=52.593, acc=0.730, loss=57.980, backward_time=1.037, grad_norm=137.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.907e-05, train_time=7.758 +[gpub001:0/64] 2023-07-15 13:55:08,582 (trainer:732) INFO: 53epoch:train:9201-9300batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=71.911, loss_att=54.358, acc=0.714, loss=59.624, backward_time=1.034, grad_norm=117.736, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.907e-05, train_time=2.803 +[gpub001:0/64] 2023-07-15 13:57:24,812 (trainer:732) INFO: 53epoch:train:9301-9400batch: iter_time=1.046e-04, forward_time=0.144, loss_ctc=75.774, loss_att=53.635, acc=0.711, loss=60.277, backward_time=1.026, grad_norm=116.262, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.906e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 13:59:47,479 (trainer:732) INFO: 53epoch:train:9401-9500batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=63.534, loss_att=44.227, acc=0.735, loss=50.019, backward_time=1.037, grad_norm=112.332, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.906e-05, train_time=2.853 +[gpub001:0/64] 2023-07-15 14:02:07,901 (trainer:732) INFO: 53epoch:train:9501-9600batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=75.989, loss_att=57.625, acc=0.716, loss=63.134, backward_time=1.035, grad_norm=128.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.905e-05, train_time=2.808 +[gpub001:0/64] 2023-07-15 14:04:30,560 (trainer:732) INFO: 53epoch:train:9601-9700batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.849, loss_att=47.746, acc=0.733, loss=52.877, backward_time=1.032, grad_norm=145.957, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.905e-05, train_time=2.853 +[gpub001:0/64] 2023-07-15 14:06:49,135 (trainer:732) INFO: 53epoch:train:9701-9800batch: iter_time=1.010e-04, forward_time=0.145, loss_ctc=64.302, loss_att=46.018, acc=0.737, loss=51.503, backward_time=1.028, grad_norm=115.494, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.904e-05, train_time=2.771 +[gpub001:0/64] 2023-07-15 14:09:06,265 (trainer:732) INFO: 53epoch:train:9801-9900batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=70.320, loss_att=50.257, acc=0.719, loss=56.276, backward_time=1.032, grad_norm=123.049, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.904e-05, train_time=2.742 +[gpub001:0/64] 2023-07-15 14:11:24,322 (trainer:732) INFO: 53epoch:train:9901-10000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=62.892, loss_att=47.066, acc=0.722, loss=51.814, backward_time=1.028, grad_norm=133.856, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.903e-05, train_time=2.761 +[gpub001:0/64] 2023-07-15 14:25:24,779 (trainer:338) INFO: 53epoch results: [train] iter_time=0.246, forward_time=0.155, loss_ctc=69.208, loss_att=50.743, acc=0.723, loss=56.282, backward_time=1.033, grad_norm=130.642, clip=100.000, loss_scale=2.290e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=3.453, time=4 hours, 48 minutes and 11.54 seconds, total_count=500000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=43.074, cer_ctc=0.248, loss_att=38.316, acc=0.682, cer=0.391, wer=0.994, loss=39.744, time=7 minutes and 22.59 seconds, total_count=51106, gpu_max_cached_mem_GB=37.635, [att_plot] time=6 minutes and 14.16 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-15 14:25:40,654 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-15 14:25:40,674 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/48epoch.pth +[gpub001:0/64] 2023-07-15 14:25:40,674 (trainer:272) INFO: 54/60epoch started. Estimated time to finish: 1 day, 10 hours and 52 minutes +[gpub001:0/64] 2023-07-15 14:25:40,788 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-15 14:25:59,055 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 14:26:03,030 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 14:26:03,030 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 14:26:03,051 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 14:32:22,669 (trainer:732) INFO: 54epoch:train:1-100batch: iter_time=2.563, forward_time=0.175, loss_ctc=63.502, loss_att=48.578, acc=0.708, loss=53.055, backward_time=1.049, grad_norm=140.341, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=4.903e-05, train_time=8.037 +[gpub001:0/64] 2023-07-15 14:34:38,802 (trainer:732) INFO: 54epoch:train:101-200batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=62.392, loss_att=47.095, acc=0.700, loss=51.684, backward_time=1.029, grad_norm=120.836, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 14:36:54,626 (trainer:732) INFO: 54epoch:train:201-300batch: iter_time=9.869e-05, forward_time=0.145, loss_ctc=72.046, loss_att=53.141, acc=0.703, loss=58.812, backward_time=1.028, grad_norm=147.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 14:39:10,400 (trainer:732) INFO: 54epoch:train:301-400batch: iter_time=1.070e-04, forward_time=0.144, loss_ctc=68.492, loss_att=54.163, acc=0.695, loss=58.462, backward_time=1.028, grad_norm=132.331, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 14:41:40,557 (trainer:732) INFO: 54epoch:train:401-500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=74.081, loss_att=54.631, acc=0.712, loss=60.466, backward_time=1.039, grad_norm=142.817, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.901e-05, train_time=3.003 +[gpub001:0/64] 2023-07-15 14:44:08,475 (trainer:732) INFO: 54epoch:train:501-600batch: iter_time=1.074e-04, forward_time=0.146, loss_ctc=75.789, loss_att=56.471, acc=0.714, loss=62.266, backward_time=1.041, grad_norm=129.097, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.901e-05, train_time=2.958 +[gpub001:0/64] 2023-07-15 14:46:25,669 (trainer:732) INFO: 54epoch:train:601-700batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=80.391, loss_att=63.078, acc=0.698, loss=68.272, backward_time=1.030, grad_norm=144.568, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.900e-05, train_time=2.744 +[gpub001:0/64] 2023-07-15 14:48:45,282 (trainer:732) INFO: 54epoch:train:701-800batch: iter_time=9.712e-05, forward_time=0.144, loss_ctc=76.898, loss_att=54.873, acc=0.711, loss=61.480, backward_time=1.034, grad_norm=129.027, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.900e-05, train_time=2.792 +[gpub001:0/64] 2023-07-15 14:49:40,812 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-15 14:49:58,541 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 14:50:02,149 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 14:50:02,149 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 14:50:02,155 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 14:55:15,387 (trainer:732) INFO: 54epoch:train:801-900batch: iter_time=1.314, forward_time=0.146, loss_ctc=67.481, loss_att=53.521, acc=0.714, loss=57.709, backward_time=1.050, grad_norm=123.737, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.899e-05, train_time=7.802 +[gpub001:0/64] 2023-07-15 14:57:33,121 (trainer:732) INFO: 54epoch:train:901-1000batch: iter_time=1.229e-04, forward_time=0.147, loss_ctc=61.392, loss_att=45.351, acc=0.703, loss=50.163, backward_time=1.028, grad_norm=131.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.899e-05, train_time=2.754 +[gpub001:0/64] 2023-07-15 14:59:32,594 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 14:59:48,916 (trainer:732) INFO: 54epoch:train:1001-1100batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=65.249, loss_att=46.994, acc=0.724, loss=52.470, backward_time=1.028, grad_norm=120.404, clip=100.000, loss_scale=3.047e+32, optim_step_time=0.182, optim0_lr0=4.898e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 15:02:05,028 (trainer:732) INFO: 54epoch:train:1101-1200batch: iter_time=1.215e-04, forward_time=0.147, loss_ctc=68.386, loss_att=53.369, acc=0.710, loss=57.874, backward_time=1.030, grad_norm=121.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.898e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 15:04:21,637 (trainer:732) INFO: 54epoch:train:1201-1300batch: iter_time=1.120e-04, forward_time=0.148, loss_ctc=72.716, loss_att=55.502, acc=0.716, loss=60.666, backward_time=1.033, grad_norm=135.679, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.897e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 15:06:37,386 (trainer:732) INFO: 54epoch:train:1301-1400batch: iter_time=1.122e-04, forward_time=0.147, loss_ctc=70.008, loss_att=51.346, acc=0.719, loss=56.945, backward_time=1.029, grad_norm=128.324, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.897e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 15:08:54,748 (trainer:732) INFO: 54epoch:train:1401-1500batch: iter_time=1.246e-04, forward_time=0.149, loss_ctc=83.968, loss_att=69.519, acc=0.699, loss=73.854, backward_time=1.032, grad_norm=138.800, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.896e-05, train_time=2.747 +[gpub001:0/64] 2023-07-15 15:11:10,946 (trainer:732) INFO: 54epoch:train:1501-1600batch: iter_time=1.227e-04, forward_time=0.148, loss_ctc=72.417, loss_att=52.717, acc=0.726, loss=58.627, backward_time=1.031, grad_norm=125.644, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.896e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 15:12:42,398 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 15:13:00,574 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 15:13:04,259 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 15:13:04,259 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-15 15:13:04,265 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 15:17:27,833 (trainer:732) INFO: 54epoch:train:1601-1700batch: iter_time=1.373, forward_time=0.166, loss_ctc=71.162, loss_att=54.576, acc=0.714, loss=59.552, backward_time=1.038, grad_norm=130.731, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.895e-05, train_time=7.537 +[gpub001:0/64] 2023-07-15 15:19:44,876 (trainer:732) INFO: 54epoch:train:1701-1800batch: iter_time=1.002e-04, forward_time=0.146, loss_ctc=57.674, loss_att=42.891, acc=0.710, loss=47.326, backward_time=1.033, grad_norm=146.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.895e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 15:22:00,770 (trainer:732) INFO: 54epoch:train:1801-1900batch: iter_time=1.005e-04, forward_time=0.145, loss_ctc=69.982, loss_att=52.765, acc=0.709, loss=57.930, backward_time=1.029, grad_norm=110.570, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 15:24:16,473 (trainer:732) INFO: 54epoch:train:1901-2000batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=68.795, loss_att=50.053, acc=0.721, loss=55.676, backward_time=1.027, grad_norm=128.134, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 15:26:32,120 (trainer:732) INFO: 54epoch:train:2001-2100batch: iter_time=1.016e-04, forward_time=0.144, loss_ctc=71.032, loss_att=54.634, acc=0.702, loss=59.553, backward_time=1.026, grad_norm=146.644, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 15:28:47,771 (trainer:732) INFO: 54epoch:train:2101-2200batch: iter_time=1.116e-04, forward_time=0.144, loss_ctc=69.011, loss_att=51.902, acc=0.714, loss=57.035, backward_time=1.027, grad_norm=146.707, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.893e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 15:31:09,057 (trainer:732) INFO: 54epoch:train:2201-2300batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=77.368, loss_att=56.839, acc=0.720, loss=62.997, backward_time=1.037, grad_norm=146.981, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.893e-05, train_time=2.826 +[gpub001:0/64] 2023-07-15 15:33:27,573 (trainer:732) INFO: 54epoch:train:2301-2400batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=72.933, loss_att=60.662, acc=0.700, loss=64.343, backward_time=1.031, grad_norm=140.913, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.892e-05, train_time=2.770 +[gpub001:0/64] 2023-07-15 15:36:01,415 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 15:36:19,553 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 15:36:23,193 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 15:36:23,194 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 15:36:23,200 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 15:39:55,199 (trainer:732) INFO: 54epoch:train:2401-2500batch: iter_time=2.445, forward_time=0.145, loss_ctc=74.369, loss_att=54.412, acc=0.716, loss=60.399, backward_time=1.038, grad_norm=132.718, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.892e-05, train_time=7.752 +[gpub001:0/64] 2023-07-15 15:42:12,710 (trainer:732) INFO: 54epoch:train:2501-2600batch: iter_time=1.524e-04, forward_time=0.147, loss_ctc=59.869, loss_att=46.039, acc=0.696, loss=50.188, backward_time=1.033, grad_norm=131.162, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.891e-05, train_time=2.750 +[gpub001:0/64] 2023-07-15 15:44:29,535 (trainer:732) INFO: 54epoch:train:2601-2700batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=65.703, loss_att=49.009, acc=0.712, loss=54.017, backward_time=1.025, grad_norm=128.350, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.891e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 15:46:45,197 (trainer:732) INFO: 54epoch:train:2701-2800batch: iter_time=1.495e-04, forward_time=0.147, loss_ctc=68.702, loss_att=53.287, acc=0.707, loss=57.911, backward_time=1.027, grad_norm=154.384, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.890e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 15:49:01,013 (trainer:732) INFO: 54epoch:train:2801-2900batch: iter_time=1.303e-04, forward_time=0.147, loss_ctc=73.798, loss_att=55.447, acc=0.706, loss=60.952, backward_time=1.029, grad_norm=127.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.890e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 15:49:11,705 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 15:51:16,520 (trainer:732) INFO: 54epoch:train:2901-3000batch: iter_time=1.349e-04, forward_time=0.146, loss_ctc=67.917, loss_att=49.080, acc=0.724, loss=54.731, backward_time=1.029, grad_norm=136.985, clip=100.000, loss_scale=8.610e+31, optim_step_time=0.182, optim0_lr0=4.889e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 15:53:32,664 (trainer:732) INFO: 54epoch:train:3001-3100batch: iter_time=1.471e-04, forward_time=0.148, loss_ctc=81.457, loss_att=61.670, acc=0.702, loss=67.606, backward_time=1.030, grad_norm=170.754, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.889e-05, train_time=2.723 +[gpub001:0/64] 2023-07-15 15:55:52,060 (trainer:732) INFO: 54epoch:train:3101-3200batch: iter_time=1.276e-04, forward_time=0.147, loss_ctc=74.030, loss_att=59.359, acc=0.705, loss=63.760, backward_time=1.030, grad_norm=138.690, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.888e-05, train_time=2.788 +[gpub001:0/64] 2023-07-15 15:58:14,921 (trainer:732) INFO: 54epoch:train:3201-3300batch: iter_time=1.442e-04, forward_time=0.146, loss_ctc=71.516, loss_att=52.026, acc=0.713, loss=57.873, backward_time=1.035, grad_norm=134.585, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.888e-05, train_time=2.857 +[gpub001:0/64] 2023-07-15 15:59:06,473 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 15:59:24,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 15:59:28,036 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 15:59:28,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 15:59:28,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 16:04:12,014 (trainer:732) INFO: 54epoch:train:3301-3400batch: iter_time=1.395, forward_time=0.180, loss_ctc=67.158, loss_att=49.393, acc=0.713, loss=54.722, backward_time=1.042, grad_norm=131.945, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=7.141 +[gpub001:0/64] 2023-07-15 16:06:28,728 (trainer:732) INFO: 54epoch:train:3401-3500batch: iter_time=9.250e-05, forward_time=0.146, loss_ctc=65.344, loss_att=46.346, acc=0.721, loss=52.046, backward_time=1.028, grad_norm=126.382, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 16:08:45,609 (trainer:732) INFO: 54epoch:train:3501-3600batch: iter_time=9.346e-05, forward_time=0.147, loss_ctc=69.324, loss_att=54.156, acc=0.716, loss=58.706, backward_time=1.030, grad_norm=127.329, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=2.737 +[gpub001:0/64] 2023-07-15 16:11:02,343 (trainer:732) INFO: 54epoch:train:3601-3700batch: iter_time=9.335e-05, forward_time=0.146, loss_ctc=68.249, loss_att=52.531, acc=0.713, loss=57.246, backward_time=1.034, grad_norm=131.595, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.886e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 16:13:18,306 (trainer:732) INFO: 54epoch:train:3701-3800batch: iter_time=9.645e-05, forward_time=0.146, loss_ctc=67.231, loss_att=51.222, acc=0.716, loss=56.025, backward_time=1.030, grad_norm=145.759, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.886e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 16:15:35,928 (trainer:732) INFO: 54epoch:train:3801-3900batch: iter_time=9.782e-05, forward_time=0.146, loss_ctc=76.405, loss_att=57.298, acc=0.718, loss=63.030, backward_time=1.030, grad_norm=118.733, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.885e-05, train_time=2.752 +[gpub001:0/64] 2023-07-15 16:17:53,102 (trainer:732) INFO: 54epoch:train:3901-4000batch: iter_time=9.738e-05, forward_time=0.146, loss_ctc=77.861, loss_att=59.602, acc=0.715, loss=65.080, backward_time=1.032, grad_norm=143.778, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.885e-05, train_time=2.743 +[gpub001:0/64] 2023-07-15 16:20:12,772 (trainer:732) INFO: 54epoch:train:4001-4100batch: iter_time=9.171e-05, forward_time=0.146, loss_ctc=71.078, loss_att=52.212, acc=0.724, loss=57.872, backward_time=1.031, grad_norm=122.391, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.884e-05, train_time=2.793 +[gpub001:0/64] 2023-07-15 16:21:56,724 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 16:22:14,655 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 16:22:18,028 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 16:22:18,028 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 16:22:18,035 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 16:29:57,478 (trainer:732) INFO: 54epoch:train:4101-4200batch: iter_time=4.421, forward_time=0.186, loss_ctc=72.658, loss_att=54.839, acc=0.713, loss=60.185, backward_time=1.041, grad_norm=113.223, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=4.884e-05, train_time=11.694 +[gpub001:0/64] 2023-07-15 16:32:14,169 (trainer:732) INFO: 54epoch:train:4201-4300batch: iter_time=1.315e-04, forward_time=0.149, loss_ctc=62.009, loss_att=45.248, acc=0.711, loss=50.276, backward_time=1.028, grad_norm=132.170, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.883e-05, train_time=2.734 +[gpub001:0/64] 2023-07-15 16:34:31,503 (trainer:732) INFO: 54epoch:train:4301-4400batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.740, loss_att=51.311, acc=0.716, loss=56.540, backward_time=1.028, grad_norm=148.146, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=4.883e-05, train_time=2.746 +[gpub001:0/64] 2023-07-15 16:36:47,096 (trainer:732) INFO: 54epoch:train:4401-4500batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=64.509, loss_att=49.340, acc=0.715, loss=53.891, backward_time=1.026, grad_norm=142.094, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.882e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 16:39:14,472 (trainer:732) INFO: 54epoch:train:4501-4600batch: iter_time=5.487e-04, forward_time=0.188, loss_ctc=71.996, loss_att=52.884, acc=0.721, loss=58.617, backward_time=1.057, grad_norm=114.514, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.191, optim0_lr0=4.882e-05, train_time=2.945 +[gpub001:0/64] 2023-07-15 16:41:40,016 (trainer:732) INFO: 54epoch:train:4601-4700batch: iter_time=1.099e-04, forward_time=0.216, loss_ctc=69.267, loss_att=53.635, acc=0.719, loss=58.325, backward_time=1.040, grad_norm=162.003, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=4.881e-05, train_time=2.913 +[gpub001:0/64] 2023-07-15 16:43:56,617 (trainer:732) INFO: 54epoch:train:4701-4800batch: iter_time=1.111e-04, forward_time=0.147, loss_ctc=79.072, loss_att=61.795, acc=0.708, loss=66.978, backward_time=1.032, grad_norm=130.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.881e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 16:46:13,780 (trainer:732) INFO: 54epoch:train:4801-4900batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=75.323, loss_att=51.827, acc=0.719, loss=58.876, backward_time=1.030, grad_norm=137.087, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=2.743 +[gpub001:0/64] 2023-07-15 16:48:31,776 (trainer:732) INFO: 54epoch:train:4901-5000batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=74.927, loss_att=57.150, acc=0.708, loss=62.483, backward_time=1.033, grad_norm=132.363, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=2.760 +[gpub001:0/64] 2023-07-15 16:48:53,054 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 16:49:11,204 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 16:49:14,657 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 16:49:14,657 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 16:49:14,723 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 16:55:54,724 (trainer:732) INFO: 54epoch:train:5001-5100batch: iter_time=2.962, forward_time=0.147, loss_ctc=58.843, loss_att=46.145, acc=0.708, loss=49.955, backward_time=1.045, grad_norm=115.373, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=8.859 +[gpub001:0/64] 2023-07-15 16:58:11,635 (trainer:732) INFO: 54epoch:train:5101-5200batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=66.317, loss_att=46.348, acc=0.724, loss=52.339, backward_time=1.031, grad_norm=131.819, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.879e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 17:00:27,591 (trainer:732) INFO: 54epoch:train:5201-5300batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=67.278, loss_att=51.549, acc=0.714, loss=56.268, backward_time=1.029, grad_norm=135.720, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.879e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 17:02:43,684 (trainer:732) INFO: 54epoch:train:5301-5400batch: iter_time=1.163e-04, forward_time=0.147, loss_ctc=73.708, loss_att=57.069, acc=0.716, loss=62.061, backward_time=1.030, grad_norm=153.196, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.878e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 17:05:10,383 (trainer:732) INFO: 54epoch:train:5401-5500batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=68.066, loss_att=48.978, acc=0.724, loss=54.704, backward_time=1.038, grad_norm=142.518, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.878e-05, train_time=2.934 +[gpub001:0/64] 2023-07-15 17:07:31,542 (trainer:732) INFO: 54epoch:train:5501-5600batch: iter_time=1.149e-04, forward_time=0.148, loss_ctc=79.121, loss_att=60.754, acc=0.714, loss=66.264, backward_time=1.037, grad_norm=156.564, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.877e-05, train_time=2.823 +[gpub001:0/64] 2023-07-15 17:09:48,129 (trainer:732) INFO: 54epoch:train:5601-5700batch: iter_time=1.147e-04, forward_time=0.148, loss_ctc=71.339, loss_att=55.929, acc=0.723, loss=60.552, backward_time=1.033, grad_norm=154.635, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.877e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 17:12:04,010 (trainer:732) INFO: 54epoch:train:5701-5800batch: iter_time=1.086e-04, forward_time=0.147, loss_ctc=72.126, loss_att=54.273, acc=0.721, loss=59.629, backward_time=1.028, grad_norm=132.905, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.876e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 17:12:54,371 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 17:13:12,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 17:13:15,973 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 17:13:15,973 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-15 17:13:15,979 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 17:18:33,665 (trainer:732) INFO: 54epoch:train:5801-5900batch: iter_time=1.397, forward_time=0.233, loss_ctc=70.879, loss_att=52.656, acc=0.723, loss=58.123, backward_time=1.073, grad_norm=140.446, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.184, optim0_lr0=4.876e-05, train_time=7.793 +[gpub001:0/64] 2023-07-15 17:21:02,892 (trainer:732) INFO: 54epoch:train:5901-6000batch: iter_time=1.398e-04, forward_time=0.165, loss_ctc=60.858, loss_att=44.904, acc=0.700, loss=49.690, backward_time=1.040, grad_norm=132.788, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=4.875e-05, train_time=2.984 +[gpub001:0/64] 2023-07-15 17:23:19,553 (trainer:732) INFO: 54epoch:train:6001-6100batch: iter_time=1.350e-04, forward_time=0.145, loss_ctc=64.750, loss_att=48.839, acc=0.720, loss=53.613, backward_time=1.028, grad_norm=136.365, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.875e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 17:25:35,434 (trainer:732) INFO: 54epoch:train:6101-6200batch: iter_time=1.440e-04, forward_time=0.147, loss_ctc=66.947, loss_att=50.736, acc=0.710, loss=55.599, backward_time=1.026, grad_norm=127.831, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=4.874e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 17:27:59,502 (trainer:732) INFO: 54epoch:train:6201-6300batch: iter_time=1.252e-04, forward_time=0.147, loss_ctc=71.889, loss_att=54.562, acc=0.718, loss=59.761, backward_time=1.036, grad_norm=116.556, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.874e-05, train_time=2.881 +[gpub001:0/64] 2023-07-15 17:30:18,402 (trainer:732) INFO: 54epoch:train:6301-6400batch: iter_time=1.250e-04, forward_time=0.147, loss_ctc=66.848, loss_att=48.903, acc=0.725, loss=54.286, backward_time=1.032, grad_norm=118.817, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.874e-05, train_time=2.778 +[gpub001:0/64] 2023-07-15 17:32:34,639 (trainer:732) INFO: 54epoch:train:6401-6500batch: iter_time=1.384e-04, forward_time=0.146, loss_ctc=79.035, loss_att=65.024, acc=0.701, loss=69.227, backward_time=1.030, grad_norm=146.629, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.873e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 17:34:53,111 (trainer:732) INFO: 54epoch:train:6501-6600batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=73.072, loss_att=54.719, acc=0.716, loss=60.225, backward_time=1.030, grad_norm=141.338, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.873e-05, train_time=2.769 +[gpub001:0/64] 2023-07-15 17:36:40,428 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 17:36:58,440 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 17:37:01,910 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 17:37:01,911 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 17:37:01,917 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 17:41:31,451 (trainer:732) INFO: 54epoch:train:6601-6700batch: iter_time=1.482, forward_time=0.207, loss_ctc=70.480, loss_att=52.884, acc=0.716, loss=58.163, backward_time=1.044, grad_norm=119.526, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=4.872e-05, train_time=7.967 +[gpub001:0/64] 2023-07-15 17:43:48,493 (trainer:732) INFO: 54epoch:train:6701-6800batch: iter_time=1.090e-04, forward_time=0.148, loss_ctc=56.961, loss_att=42.537, acc=0.721, loss=46.864, backward_time=1.031, grad_norm=127.255, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.872e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 17:46:04,798 (trainer:732) INFO: 54epoch:train:6801-6900batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=66.029, loss_att=48.816, acc=0.717, loss=53.980, backward_time=1.029, grad_norm=110.226, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.871e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 17:48:20,547 (trainer:732) INFO: 54epoch:train:6901-7000batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=68.985, loss_att=52.724, acc=0.718, loss=57.602, backward_time=1.026, grad_norm=155.820, clip=100.000, loss_scale=1.558e+32, optim_step_time=0.182, optim0_lr0=4.871e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 17:50:36,889 (trainer:732) INFO: 54epoch:train:7001-7100batch: iter_time=1.080e-04, forward_time=0.147, loss_ctc=69.839, loss_att=54.243, acc=0.714, loss=58.922, backward_time=1.029, grad_norm=144.642, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.870e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 17:52:52,346 (trainer:732) INFO: 54epoch:train:7101-7200batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=67.013, loss_att=51.423, acc=0.715, loss=56.100, backward_time=1.025, grad_norm=131.762, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.870e-05, train_time=2.709 +[gpub001:0/64] 2023-07-15 17:55:09,258 (trainer:732) INFO: 54epoch:train:7201-7300batch: iter_time=1.155e-04, forward_time=0.148, loss_ctc=73.813, loss_att=55.438, acc=0.729, loss=60.950, backward_time=1.031, grad_norm=134.728, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.869e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 17:57:25,341 (trainer:732) INFO: 54epoch:train:7301-7400batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=77.874, loss_att=60.697, acc=0.708, loss=65.850, backward_time=1.030, grad_norm=138.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.869e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 17:59:41,257 (trainer:732) INFO: 54epoch:train:7401-7500batch: iter_time=1.076e-04, forward_time=0.147, loss_ctc=72.240, loss_att=53.009, acc=0.727, loss=58.778, backward_time=1.029, grad_norm=151.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 17:59:45,954 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 18:00:04,194 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 18:00:07,635 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 18:00:07,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 18:00:07,736 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 18:05:23,755 (trainer:732) INFO: 54epoch:train:7501-7600batch: iter_time=1.578, forward_time=0.148, loss_ctc=63.070, loss_att=46.075, acc=0.730, loss=51.173, backward_time=1.046, grad_norm=104.077, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=6.850 +[gpub001:0/64] 2023-07-15 18:07:39,960 (trainer:732) INFO: 54epoch:train:7601-7700batch: iter_time=1.174e-04, forward_time=0.147, loss_ctc=61.589, loss_att=43.882, acc=0.723, loss=49.194, backward_time=1.027, grad_norm=131.469, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 18:09:56,626 (trainer:732) INFO: 54epoch:train:7701-7800batch: iter_time=1.127e-04, forward_time=0.149, loss_ctc=67.589, loss_att=49.630, acc=0.721, loss=55.018, backward_time=1.029, grad_norm=119.043, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.867e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 18:12:12,863 (trainer:732) INFO: 54epoch:train:7801-7900batch: iter_time=1.177e-04, forward_time=0.147, loss_ctc=68.700, loss_att=54.220, acc=0.708, loss=58.564, backward_time=1.028, grad_norm=137.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.867e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 18:14:41,639 (trainer:732) INFO: 54epoch:train:7901-8000batch: iter_time=5.809e-04, forward_time=0.239, loss_ctc=69.066, loss_att=52.174, acc=0.726, loss=57.241, backward_time=1.045, grad_norm=143.195, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.187, optim0_lr0=4.866e-05, train_time=2.975 +[gpub001:0/64] 2023-07-15 18:17:01,347 (trainer:732) INFO: 54epoch:train:8001-8100batch: iter_time=1.186e-04, forward_time=0.170, loss_ctc=74.493, loss_att=53.218, acc=0.731, loss=59.601, backward_time=1.032, grad_norm=137.223, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.866e-05, train_time=2.794 +[gpub001:0/64] 2023-07-15 18:19:39,609 (trainer:732) INFO: 54epoch:train:8101-8200batch: iter_time=1.190e-04, forward_time=0.154, loss_ctc=76.741, loss_att=61.285, acc=0.712, loss=65.922, backward_time=1.056, grad_norm=126.126, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.865e-05, train_time=3.165 +[gpub001:0/64] 2023-07-15 18:21:56,191 (trainer:732) INFO: 54epoch:train:8201-8300batch: iter_time=1.161e-04, forward_time=0.148, loss_ctc=74.008, loss_att=52.223, acc=0.727, loss=58.758, backward_time=1.032, grad_norm=115.839, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.865e-05, train_time=2.731 +[gpub001:0/64] 2023-07-15 18:23:02,635 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 18:23:20,841 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 18:23:24,297 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 18:23:24,297 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 18:23:24,303 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 18:29:23,956 (trainer:732) INFO: 54epoch:train:8301-8400batch: iter_time=2.345, forward_time=0.153, loss_ctc=64.437, loss_att=50.931, acc=0.730, loss=54.983, backward_time=1.080, grad_norm=113.050, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.864e-05, train_time=8.955 +[gpub001:0/64] 2023-07-15 18:31:53,086 (trainer:732) INFO: 54epoch:train:8401-8500batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=60.709, loss_att=44.071, acc=0.715, loss=49.063, backward_time=1.058, grad_norm=119.273, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.864e-05, train_time=2.982 +[gpub001:0/64] 2023-07-15 18:34:12,345 (trainer:732) INFO: 54epoch:train:8501-8600batch: iter_time=1.015e-04, forward_time=0.146, loss_ctc=63.847, loss_att=44.978, acc=0.734, loss=50.639, backward_time=1.044, grad_norm=106.758, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.863e-05, train_time=2.785 +[gpub001:0/64] 2023-07-15 18:36:33,395 (trainer:732) INFO: 54epoch:train:8601-8700batch: iter_time=1.047e-04, forward_time=0.145, loss_ctc=67.021, loss_att=51.807, acc=0.717, loss=56.371, backward_time=1.036, grad_norm=147.733, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.863e-05, train_time=2.821 +[gpub001:0/64] 2023-07-15 18:38:52,658 (trainer:732) INFO: 54epoch:train:8701-8800batch: iter_time=9.446e-05, forward_time=0.146, loss_ctc=71.996, loss_att=54.637, acc=0.721, loss=59.845, backward_time=1.031, grad_norm=134.907, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.785 +[gpub001:0/64] 2023-07-15 18:41:08,460 (trainer:732) INFO: 54epoch:train:8801-8900batch: iter_time=9.579e-05, forward_time=0.147, loss_ctc=67.011, loss_att=49.373, acc=0.731, loss=54.664, backward_time=1.028, grad_norm=118.447, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 18:43:31,785 (trainer:732) INFO: 54epoch:train:8901-9000batch: iter_time=9.115e-05, forward_time=0.147, loss_ctc=80.349, loss_att=63.204, acc=0.712, loss=68.347, backward_time=1.040, grad_norm=116.323, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.866 +[gpub001:0/64] 2023-07-15 18:45:47,919 (trainer:732) INFO: 54epoch:train:9001-9100batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=72.525, loss_att=52.621, acc=0.728, loss=58.592, backward_time=1.031, grad_norm=131.623, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.861e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 18:47:20,788 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 18:47:38,830 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 18:47:42,516 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 18:47:42,516 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 18:47:42,522 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 18:51:38,331 (trainer:732) INFO: 54epoch:train:9101-9200batch: iter_time=1.531, forward_time=0.173, loss_ctc=69.490, loss_att=52.765, acc=0.723, loss=57.782, backward_time=1.037, grad_norm=124.708, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.861e-05, train_time=7.008 +[gpub001:0/64] 2023-07-15 18:53:54,968 (trainer:732) INFO: 54epoch:train:9201-9300batch: iter_time=1.110e-04, forward_time=0.147, loss_ctc=56.277, loss_att=41.943, acc=0.713, loss=46.244, backward_time=1.031, grad_norm=107.239, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.860e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 18:56:11,597 (trainer:732) INFO: 54epoch:train:9301-9400batch: iter_time=1.083e-04, forward_time=0.147, loss_ctc=66.657, loss_att=51.226, acc=0.713, loss=55.856, backward_time=1.029, grad_norm=120.878, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.860e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 18:58:28,213 (trainer:732) INFO: 54epoch:train:9401-9500batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=68.048, loss_att=49.821, acc=0.723, loss=55.289, backward_time=1.032, grad_norm=148.242, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.859e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 19:00:44,029 (trainer:732) INFO: 54epoch:train:9501-9600batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=70.255, loss_att=54.181, acc=0.708, loss=59.003, backward_time=1.028, grad_norm=132.345, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.859e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 19:03:00,419 (trainer:732) INFO: 54epoch:train:9601-9700batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=66.069, loss_att=50.762, acc=0.716, loss=55.354, backward_time=1.028, grad_norm=135.698, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.858e-05, train_time=2.728 +[gpub001:0/64] 2023-07-15 19:05:16,211 (trainer:732) INFO: 54epoch:train:9701-9800batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=76.996, loss_att=58.173, acc=0.719, loss=63.820, backward_time=1.029, grad_norm=132.925, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.858e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 19:07:32,296 (trainer:732) INFO: 54epoch:train:9801-9900batch: iter_time=1.342e-04, forward_time=0.147, loss_ctc=71.876, loss_att=58.022, acc=0.710, loss=62.178, backward_time=1.030, grad_norm=155.420, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.857e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 19:09:47,953 (trainer:732) INFO: 54epoch:train:9901-10000batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=72.341, loss_att=53.585, acc=0.723, loss=59.212, backward_time=1.026, grad_norm=135.675, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.857e-05, train_time=2.713 +[gpub001:0/64] 2023-07-15 19:23:03,469 (trainer:338) INFO: 54epoch results: [train] iter_time=0.248, forward_time=0.152, loss_ctc=69.871, loss_att=52.737, acc=0.715, loss=57.878, backward_time=1.034, grad_norm=133.170, clip=100.000, loss_scale=1.474e+32, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=3.409, time=4 hours, 44 minutes and 19.81 seconds, total_count=510000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=41.546, cer_ctc=0.245, loss_att=36.383, acc=0.679, cer=0.423, wer=1.000, loss=37.932, time=7 minutes and 4.7 seconds, total_count=52118, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 58.2 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-15 19:23:19,345 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-15 19:23:19,357 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/49epoch.pth +[gpub001:0/64] 2023-07-15 19:23:19,357 (trainer:272) INFO: 55/60epoch started. Estimated time to finish: 1 day, 5 hours and 52 minutes +[gpub001:0/64] 2023-07-15 19:23:19,377 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-15 19:23:37,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 19:23:40,335 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 19:23:40,335 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub001:0/64] 2023-07-15 19:23:40,341 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 19:31:02,897 (trainer:732) INFO: 55epoch:train:1-100batch: iter_time=3.212, forward_time=0.179, loss_ctc=66.161, loss_att=47.370, acc=0.712, loss=53.007, backward_time=1.042, grad_norm=114.013, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.856e-05, train_time=9.270 +[gpub001:0/64] 2023-07-15 19:33:19,196 (trainer:732) INFO: 55epoch:train:101-200batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=80.026, loss_att=57.239, acc=0.710, loss=64.075, backward_time=1.030, grad_norm=155.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.856e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 19:35:36,675 (trainer:732) INFO: 55epoch:train:201-300batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=70.409, loss_att=49.481, acc=0.714, loss=55.759, backward_time=1.028, grad_norm=126.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.856e-05, train_time=2.749 +[gpub001:0/64] 2023-07-15 19:37:52,657 (trainer:732) INFO: 55epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=73.324, loss_att=56.085, acc=0.696, loss=61.256, backward_time=1.028, grad_norm=136.348, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.855e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 19:40:11,554 (trainer:732) INFO: 55epoch:train:401-500batch: iter_time=1.186e-04, forward_time=0.145, loss_ctc=68.927, loss_att=51.897, acc=0.701, loss=57.006, backward_time=1.027, grad_norm=132.079, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.855e-05, train_time=2.778 +[gpub001:0/64] 2023-07-15 19:42:27,271 (trainer:732) INFO: 55epoch:train:501-600batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=70.748, loss_att=54.161, acc=0.713, loss=59.137, backward_time=1.026, grad_norm=129.751, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.854e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 19:44:43,089 (trainer:732) INFO: 55epoch:train:601-700batch: iter_time=1.243e-04, forward_time=0.146, loss_ctc=70.260, loss_att=56.700, acc=0.709, loss=60.768, backward_time=1.027, grad_norm=134.754, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.854e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 19:47:02,647 (trainer:732) INFO: 55epoch:train:701-800batch: iter_time=1.169e-04, forward_time=0.159, loss_ctc=61.315, loss_att=47.512, acc=0.708, loss=51.653, backward_time=1.032, grad_norm=119.787, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.853e-05, train_time=2.791 +[gpub001:0/64] 2023-07-15 19:47:56,741 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-15 19:48:14,565 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 19:48:17,927 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 19:48:17,927 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub001:0/64] 2023-07-15 19:48:17,934 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 19:52:24,523 (trainer:732) INFO: 55epoch:train:801-900batch: iter_time=1.522, forward_time=0.203, loss_ctc=69.155, loss_att=52.160, acc=0.709, loss=57.258, backward_time=1.045, grad_norm=146.382, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.853e-05, train_time=6.437 +[gpub001:0/64] 2023-07-15 19:54:41,572 (trainer:732) INFO: 55epoch:train:901-1000batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=70.035, loss_att=56.263, acc=0.706, loss=60.395, backward_time=1.032, grad_norm=129.876, clip=100.000, loss_scale=3.115e+32, optim_step_time=0.182, optim0_lr0=4.852e-05, train_time=2.741 +[gpub001:0/64] 2023-07-15 19:56:57,064 (trainer:732) INFO: 55epoch:train:1001-1100batch: iter_time=1.091e-04, forward_time=0.144, loss_ctc=78.100, loss_att=53.099, acc=0.718, loss=60.599, backward_time=1.025, grad_norm=139.253, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.852e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 19:59:12,943 (trainer:732) INFO: 55epoch:train:1101-1200batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=74.777, loss_att=55.606, acc=0.702, loss=61.357, backward_time=1.027, grad_norm=126.416, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.851e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 20:01:28,825 (trainer:732) INFO: 55epoch:train:1201-1300batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=69.023, loss_att=53.038, acc=0.705, loss=57.833, backward_time=1.029, grad_norm=128.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.851e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 20:03:45,287 (trainer:732) INFO: 55epoch:train:1301-1400batch: iter_time=1.178e-04, forward_time=0.147, loss_ctc=65.828, loss_att=47.995, acc=0.715, loss=53.345, backward_time=1.029, grad_norm=139.823, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.851e-05, train_time=2.729 +[gpub001:0/64] 2023-07-15 20:04:15,026 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-15 20:06:00,814 (trainer:732) INFO: 55epoch:train:1401-1500batch: iter_time=8.046e-04, forward_time=0.145, loss_ctc=70.081, loss_att=54.160, acc=0.716, loss=58.936, backward_time=1.028, grad_norm=161.967, clip=100.000, loss_scale=1.954e+32, optim_step_time=0.182, optim0_lr0=4.850e-05, train_time=2.710 +[gpub001:0/64] 2023-07-15 20:08:16,706 (trainer:732) INFO: 55epoch:train:1501-1600batch: iter_time=1.300e-04, forward_time=0.148, loss_ctc=65.333, loss_att=50.645, acc=0.717, loss=55.052, backward_time=1.028, grad_norm=125.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.850e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 20:09:58,752 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-15 20:10:16,629 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 20:10:20,102 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 20:10:20,102 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub001:0/64] 2023-07-15 20:10:20,108 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 20:15:05,333 (trainer:732) INFO: 55epoch:train:1601-1700batch: iter_time=2.640, forward_time=0.161, loss_ctc=69.772, loss_att=56.145, acc=0.702, loss=60.233, backward_time=1.045, grad_norm=123.498, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.849e-05, train_time=8.172 +[gpub001:0/64] 2023-07-15 20:17:22,257 (trainer:732) INFO: 55epoch:train:1701-1800batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=71.715, loss_att=51.174, acc=0.722, loss=57.336, backward_time=1.032, grad_norm=144.148, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.849e-05, train_time=2.738 +[gpub001:0/64] 2023-07-15 20:19:38,257 (trainer:732) INFO: 55epoch:train:1801-1900batch: iter_time=1.163e-04, forward_time=0.146, loss_ctc=71.559, loss_att=54.084, acc=0.725, loss=59.327, backward_time=1.029, grad_norm=122.104, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.848e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 20:21:54,506 (trainer:732) INFO: 55epoch:train:1901-2000batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=72.459, loss_att=49.201, acc=0.735, loss=56.178, backward_time=1.029, grad_norm=122.229, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.848e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 20:24:10,321 (trainer:732) INFO: 55epoch:train:2001-2100batch: iter_time=1.121e-04, forward_time=0.146, loss_ctc=72.859, loss_att=55.435, acc=0.707, loss=60.662, backward_time=1.027, grad_norm=135.154, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.847e-05, train_time=2.716 +[gpub001:0/64] 2023-07-15 20:26:25,933 (trainer:732) INFO: 55epoch:train:2101-2200batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=69.278, loss_att=52.189, acc=0.717, loss=57.316, backward_time=1.026, grad_norm=183.531, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.847e-05, train_time=2.712 +[gpub001:0/64] 2023-07-15 20:28:41,637 (trainer:732) INFO: 55epoch:train:2201-2300batch: iter_time=1.061e-04, forward_time=0.146, loss_ctc=67.008, loss_att=48.170, acc=0.728, loss=53.821, backward_time=1.027, grad_norm=136.904, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.714 +[gpub001:0/64] 2023-07-15 20:30:57,552 (trainer:732) INFO: 55epoch:train:2301-2400batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=69.173, loss_att=54.867, acc=0.725, loss=59.159, backward_time=1.028, grad_norm=129.937, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 20:33:13,592 (trainer:732) INFO: 55epoch:train:2401-2500batch: iter_time=1.065e-04, forward_time=0.146, loss_ctc=64.746, loss_att=49.739, acc=0.715, loss=54.241, backward_time=1.028, grad_norm=119.142, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 20:33:16,890 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-15 20:33:34,658 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 20:33:38,071 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 20:33:38,071 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub001:0/64] 2023-07-15 20:33:38,078 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 20:39:15,364 (trainer:732) INFO: 55epoch:train:2501-2600batch: iter_time=1.324, forward_time=0.155, loss_ctc=75.159, loss_att=52.705, acc=0.723, loss=59.441, backward_time=1.050, grad_norm=172.779, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.845e-05, train_time=7.235 +[gpub001:0/64] 2023-07-15 20:41:32,183 (trainer:732) INFO: 55epoch:train:2601-2700batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=69.915, loss_att=53.588, acc=0.721, loss=58.486, backward_time=1.031, grad_norm=134.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.845e-05, train_time=2.736 +[gpub001:0/64] 2023-07-15 20:43:48,482 (trainer:732) INFO: 55epoch:train:2701-2800batch: iter_time=1.217e-04, forward_time=0.147, loss_ctc=76.857, loss_att=50.857, acc=0.733, loss=58.657, backward_time=1.031, grad_norm=125.091, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.844e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 20:46:04,499 (trainer:732) INFO: 55epoch:train:2801-2900batch: iter_time=1.144e-04, forward_time=0.146, loss_ctc=70.084, loss_att=55.592, acc=0.713, loss=59.939, backward_time=1.030, grad_norm=156.149, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.844e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 20:48:20,501 (trainer:732) INFO: 55epoch:train:2901-3000batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=65.972, loss_att=49.663, acc=0.717, loss=54.556, backward_time=1.030, grad_norm=162.432, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.843e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 20:50:45,633 (trainer:732) INFO: 55epoch:train:3001-3100batch: iter_time=1.104e-04, forward_time=0.194, loss_ctc=66.366, loss_att=46.152, acc=0.727, loss=52.216, backward_time=1.050, grad_norm=136.185, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=4.843e-05, train_time=2.901 +[gpub001:0/64] 2023-07-15 20:53:05,403 (trainer:732) INFO: 55epoch:train:3101-3200batch: iter_time=1.162e-04, forward_time=0.173, loss_ctc=72.329, loss_att=57.968, acc=0.722, loss=62.276, backward_time=1.030, grad_norm=129.959, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.842e-05, train_time=2.796 +[gpub001:0/64] 2023-07-15 20:55:21,845 (trainer:732) INFO: 55epoch:train:3201-3300batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=60.730, loss_att=48.110, acc=0.725, loss=51.896, backward_time=1.030, grad_norm=110.140, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.842e-05, train_time=2.729 +[gpub001:0/64] 2023-07-15 20:56:24,324 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-15 20:56:42,220 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 20:56:45,641 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 20:56:45,641 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-15 20:56:45,671 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 21:01:16,198 (trainer:732) INFO: 55epoch:train:3301-3400batch: iter_time=2.037, forward_time=0.159, loss_ctc=65.648, loss_att=46.081, acc=0.723, loss=51.951, backward_time=1.054, grad_norm=141.567, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.841e-05, train_time=7.087 +[gpub001:0/64] 2023-07-15 21:03:32,649 (trainer:732) INFO: 55epoch:train:3401-3500batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=68.666, loss_att=55.199, acc=0.722, loss=59.239, backward_time=1.030, grad_norm=148.789, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.841e-05, train_time=2.729 +[gpub001:0/64] 2023-07-15 21:05:48,957 (trainer:732) INFO: 55epoch:train:3501-3600batch: iter_time=1.139e-04, forward_time=0.147, loss_ctc=75.185, loss_att=50.223, acc=0.733, loss=57.711, backward_time=1.031, grad_norm=142.225, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.841e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 21:08:05,090 (trainer:732) INFO: 55epoch:train:3601-3700batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=72.633, loss_att=54.354, acc=0.712, loss=59.838, backward_time=1.029, grad_norm=156.626, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.840e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 21:10:21,444 (trainer:732) INFO: 55epoch:train:3701-3800batch: iter_time=1.204e-04, forward_time=0.147, loss_ctc=69.561, loss_att=51.973, acc=0.725, loss=57.249, backward_time=1.030, grad_norm=129.593, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.840e-05, train_time=2.727 +[gpub001:0/64] 2023-07-15 21:12:37,572 (trainer:732) INFO: 55epoch:train:3801-3900batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=64.566, loss_att=47.497, acc=0.722, loss=52.618, backward_time=1.027, grad_norm=133.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.839e-05, train_time=2.722 +[gpub001:0/64] 2023-07-15 21:14:53,769 (trainer:732) INFO: 55epoch:train:3901-4000batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=67.393, loss_att=53.835, acc=0.726, loss=57.902, backward_time=1.029, grad_norm=140.927, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.839e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 21:17:09,543 (trainer:732) INFO: 55epoch:train:4001-4100batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=65.438, loss_att=51.648, acc=0.716, loss=55.785, backward_time=1.027, grad_norm=112.494, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.838e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 21:18:46,330 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-15 21:19:04,591 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 21:19:08,060 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 21:19:08,060 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub001:0/64] 2023-07-15 21:19:08,066 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 21:23:46,402 (trainer:732) INFO: 55epoch:train:4101-4200batch: iter_time=1.571, forward_time=0.171, loss_ctc=69.400, loss_att=56.671, acc=0.707, loss=60.489, backward_time=1.040, grad_norm=139.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.838e-05, train_time=7.937 +[gpub001:0/64] 2023-07-15 21:26:03,353 (trainer:732) INFO: 55epoch:train:4201-4300batch: iter_time=1.202e-04, forward_time=0.146, loss_ctc=71.370, loss_att=51.067, acc=0.719, loss=57.158, backward_time=1.033, grad_norm=124.956, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.837e-05, train_time=2.739 +[gpub001:0/64] 2023-07-15 21:28:19,368 (trainer:732) INFO: 55epoch:train:4301-4400batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.788, loss_att=55.523, acc=0.716, loss=60.102, backward_time=1.030, grad_norm=145.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.837e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 21:30:35,376 (trainer:732) INFO: 55epoch:train:4401-4500batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=73.170, loss_att=49.309, acc=0.727, loss=56.467, backward_time=1.030, grad_norm=177.689, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.836e-05, train_time=2.720 +[gpub001:0/64] 2023-07-15 21:32:57,674 (trainer:732) INFO: 55epoch:train:4501-4600batch: iter_time=1.231e-04, forward_time=0.170, loss_ctc=71.775, loss_att=55.086, acc=0.701, loss=60.093, backward_time=1.065, grad_norm=138.671, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.836e-05, train_time=2.845 +[gpub001:0/64] 2023-07-15 21:35:18,977 (trainer:732) INFO: 55epoch:train:4601-4700batch: iter_time=1.292e-04, forward_time=0.163, loss_ctc=68.115, loss_att=52.067, acc=0.705, loss=56.882, backward_time=1.030, grad_norm=146.040, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.836e-05, train_time=2.826 +[gpub001:0/64] 2023-07-15 21:37:37,938 (trainer:732) INFO: 55epoch:train:4701-4800batch: iter_time=1.370e-04, forward_time=0.146, loss_ctc=67.563, loss_att=48.505, acc=0.725, loss=54.222, backward_time=1.041, grad_norm=114.887, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.835e-05, train_time=2.779 +[gpub001:0/64] 2023-07-15 21:39:53,753 (trainer:732) INFO: 55epoch:train:4801-4900batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.293, loss_att=55.110, acc=0.720, loss=59.365, backward_time=1.028, grad_norm=141.733, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.835e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 21:42:09,547 (trainer:732) INFO: 55epoch:train:4901-5000batch: iter_time=1.178e-04, forward_time=0.146, loss_ctc=62.874, loss_att=48.554, acc=0.716, loss=52.850, backward_time=1.029, grad_norm=127.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.834e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 21:42:13,217 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-15 21:42:31,219 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 21:42:34,649 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 21:42:34,649 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub001:0/64] 2023-07-15 21:42:34,655 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 21:48:26,273 (trainer:732) INFO: 55epoch:train:5001-5100batch: iter_time=1.355, forward_time=0.182, loss_ctc=63.212, loss_att=44.533, acc=0.732, loss=50.136, backward_time=1.043, grad_norm=148.605, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.834e-05, train_time=7.534 +[gpub001:0/64] 2023-07-15 21:50:42,538 (trainer:732) INFO: 55epoch:train:5101-5200batch: iter_time=9.720e-05, forward_time=0.144, loss_ctc=75.917, loss_att=55.962, acc=0.723, loss=61.949, backward_time=1.030, grad_norm=125.059, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.833e-05, train_time=2.725 +[gpub001:0/64] 2023-07-15 21:52:58,854 (trainer:732) INFO: 55epoch:train:5201-5300batch: iter_time=8.952e-05, forward_time=0.144, loss_ctc=68.540, loss_att=46.952, acc=0.732, loss=53.429, backward_time=1.030, grad_norm=136.243, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.833e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 21:55:19,419 (trainer:732) INFO: 55epoch:train:5301-5400batch: iter_time=8.648e-05, forward_time=0.144, loss_ctc=72.817, loss_att=54.724, acc=0.714, loss=60.152, backward_time=1.034, grad_norm=142.694, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.832e-05, train_time=2.811 +[gpub001:0/64] 2023-07-15 21:57:36,018 (trainer:732) INFO: 55epoch:train:5401-5500batch: iter_time=9.088e-05, forward_time=0.144, loss_ctc=68.000, loss_att=50.279, acc=0.722, loss=55.595, backward_time=1.030, grad_norm=120.000, clip=100.000, loss_scale=2.888e+32, optim_step_time=0.182, optim0_lr0=4.832e-05, train_time=2.732 +[gpub001:0/64] 2023-07-15 21:59:52,599 (trainer:732) INFO: 55epoch:train:5501-5600batch: iter_time=9.922e-05, forward_time=0.145, loss_ctc=70.562, loss_att=53.030, acc=0.727, loss=58.290, backward_time=1.031, grad_norm=151.714, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.831e-05, train_time=2.731 +[gpub001:0/64] 2023-07-15 22:02:08,924 (trainer:732) INFO: 55epoch:train:5601-5700batch: iter_time=1.006e-04, forward_time=0.144, loss_ctc=67.520, loss_att=56.170, acc=0.720, loss=59.575, backward_time=1.030, grad_norm=179.978, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.831e-05, train_time=2.726 +[gpub001:0/64] 2023-07-15 22:04:27,557 (trainer:732) INFO: 55epoch:train:5701-5800batch: iter_time=9.311e-05, forward_time=0.144, loss_ctc=60.189, loss_att=46.927, acc=0.714, loss=50.906, backward_time=1.031, grad_norm=118.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.831e-05, train_time=2.772 +[gpub001:0/64] 2023-07-15 22:05:31,121 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-15 22:05:48,967 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 22:05:52,430 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 22:05:52,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub001:0/64] 2023-07-15 22:05:52,437 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 22:12:10,087 (trainer:732) INFO: 55epoch:train:5801-5900batch: iter_time=3.061, forward_time=0.185, loss_ctc=75.455, loss_att=58.296, acc=0.718, loss=63.444, backward_time=1.105, grad_norm=123.117, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=4.830e-05, train_time=9.250 +[gpub001:0/64] 2023-07-15 22:14:27,175 (trainer:732) INFO: 55epoch:train:5901-6000batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=68.118, loss_att=46.658, acc=0.734, loss=53.096, backward_time=1.031, grad_norm=177.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.830e-05, train_time=2.742 +[gpub001:0/64] 2023-07-15 22:16:43,727 (trainer:732) INFO: 55epoch:train:6001-6100batch: iter_time=1.079e-04, forward_time=0.146, loss_ctc=79.566, loss_att=57.355, acc=0.725, loss=64.019, backward_time=1.031, grad_norm=166.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.829e-05, train_time=2.731 +[gpub001:0/64] 2023-07-15 22:18:59,637 (trainer:732) INFO: 55epoch:train:6101-6200batch: iter_time=1.009e-04, forward_time=0.144, loss_ctc=69.002, loss_att=52.731, acc=0.722, loss=57.612, backward_time=1.029, grad_norm=119.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.829e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 22:21:16,210 (trainer:732) INFO: 55epoch:train:6201-6300batch: iter_time=1.116e-04, forward_time=0.145, loss_ctc=68.288, loss_att=50.084, acc=0.721, loss=55.545, backward_time=1.030, grad_norm=139.701, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.828e-05, train_time=2.731 +[gpub001:0/64] 2023-07-15 22:23:44,539 (trainer:732) INFO: 55epoch:train:6301-6400batch: iter_time=1.056e-04, forward_time=0.144, loss_ctc=67.344, loss_att=48.425, acc=0.728, loss=54.101, backward_time=1.045, grad_norm=133.354, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.828e-05, train_time=2.966 +[gpub001:0/64] 2023-07-15 22:26:07,503 (trainer:732) INFO: 55epoch:train:6401-6500batch: iter_time=1.176e-04, forward_time=0.152, loss_ctc=72.129, loss_att=56.674, acc=0.724, loss=61.311, backward_time=1.051, grad_norm=128.706, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.827e-05, train_time=2.859 +[gpub001:0/64] 2023-07-15 22:28:29,981 (trainer:732) INFO: 55epoch:train:6501-6600batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=62.301, loss_att=49.950, acc=0.726, loss=53.655, backward_time=1.040, grad_norm=125.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.827e-05, train_time=2.849 +[gpub001:0/64] 2023-07-15 22:30:06,709 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-15 22:30:25,064 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 22:30:28,526 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 22:30:28,526 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub001:0/64] 2023-07-15 22:30:28,532 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 22:36:04,422 (trainer:732) INFO: 55epoch:train:6601-6700batch: iter_time=1.567, forward_time=0.148, loss_ctc=66.536, loss_att=49.542, acc=0.714, loss=54.640, backward_time=1.031, grad_norm=147.774, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.827e-05, train_time=9.089 +[gpub001:0/64] 2023-07-15 22:38:24,208 (trainer:732) INFO: 55epoch:train:6701-6800batch: iter_time=1.030e-04, forward_time=0.167, loss_ctc=70.272, loss_att=49.751, acc=0.724, loss=55.907, backward_time=1.037, grad_norm=112.465, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.826e-05, train_time=2.796 +[gpub001:0/64] 2023-07-15 22:40:41,823 (trainer:732) INFO: 55epoch:train:6801-6900batch: iter_time=9.641e-05, forward_time=0.146, loss_ctc=70.101, loss_att=53.067, acc=0.720, loss=58.177, backward_time=1.032, grad_norm=122.968, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.826e-05, train_time=2.752 +[gpub001:0/64] 2023-07-15 22:42:59,328 (trainer:732) INFO: 55epoch:train:6901-7000batch: iter_time=9.366e-05, forward_time=0.145, loss_ctc=72.478, loss_att=49.337, acc=0.730, loss=56.279, backward_time=1.029, grad_norm=129.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.825e-05, train_time=2.750 +[gpub001:0/64] 2023-07-15 22:45:16,265 (trainer:732) INFO: 55epoch:train:7001-7100batch: iter_time=1.199e-04, forward_time=0.145, loss_ctc=72.021, loss_att=55.230, acc=0.700, loss=60.268, backward_time=1.029, grad_norm=126.971, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.825e-05, train_time=2.739 +[gpub001:0/64] 2023-07-15 22:47:32,473 (trainer:732) INFO: 55epoch:train:7101-7200batch: iter_time=1.102e-04, forward_time=0.145, loss_ctc=67.237, loss_att=51.993, acc=0.709, loss=56.566, backward_time=1.029, grad_norm=119.930, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.824e-05, train_time=2.724 +[gpub001:0/64] 2023-07-15 22:49:48,357 (trainer:732) INFO: 55epoch:train:7201-7300batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=67.345, loss_att=47.923, acc=0.726, loss=53.750, backward_time=1.029, grad_norm=118.761, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.824e-05, train_time=2.717 +[gpub001:0/64] 2023-07-15 22:52:04,142 (trainer:732) INFO: 55epoch:train:7301-7400batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=68.462, loss_att=53.569, acc=0.725, loss=58.037, backward_time=1.028, grad_norm=138.314, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.823e-05, train_time=2.715 +[gpub001:0/64] 2023-07-15 22:54:24,008 (trainer:732) INFO: 55epoch:train:7401-7500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.726, loss_att=49.352, acc=0.717, loss=53.664, backward_time=1.035, grad_norm=118.056, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.823e-05, train_time=2.797 +[gpub001:0/64] 2023-07-15 22:54:29,095 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-15 22:54:47,130 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 22:54:50,600 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 22:54:50,600 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub001:0/64] 2023-07-15 22:54:50,606 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 22:59:53,940 (trainer:732) INFO: 55epoch:train:7501-7600batch: iter_time=1.342, forward_time=0.179, loss_ctc=75.400, loss_att=51.040, acc=0.730, loss=58.348, backward_time=1.045, grad_norm=136.571, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=6.598 +[gpub001:0/64] 2023-07-15 23:02:10,577 (trainer:732) INFO: 55epoch:train:7601-7700batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=69.202, loss_att=52.650, acc=0.727, loss=57.616, backward_time=1.028, grad_norm=137.867, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=2.733 +[gpub001:0/64] 2023-07-15 23:04:27,339 (trainer:732) INFO: 55epoch:train:7701-7800batch: iter_time=1.118e-04, forward_time=0.147, loss_ctc=75.729, loss_att=50.295, acc=0.738, loss=57.925, backward_time=1.033, grad_norm=129.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 23:06:43,267 (trainer:732) INFO: 55epoch:train:7801-7900batch: iter_time=1.329e-04, forward_time=0.147, loss_ctc=68.754, loss_att=54.027, acc=0.720, loss=58.445, backward_time=1.030, grad_norm=130.834, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.821e-05, train_time=2.718 +[gpub001:0/64] 2023-07-15 23:09:14,681 (trainer:732) INFO: 55epoch:train:7901-8000batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=65.383, loss_att=49.653, acc=0.720, loss=54.372, backward_time=1.082, grad_norm=143.167, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.821e-05, train_time=3.028 +[gpub001:0/64] 2023-07-15 23:11:49,953 (trainer:732) INFO: 55epoch:train:8001-8100batch: iter_time=1.179e-04, forward_time=0.148, loss_ctc=65.409, loss_att=44.547, acc=0.734, loss=50.806, backward_time=1.053, grad_norm=133.133, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.820e-05, train_time=3.105 +[gpub001:0/64] 2023-07-15 23:14:06,705 (trainer:732) INFO: 55epoch:train:8101-8200batch: iter_time=1.099e-04, forward_time=0.148, loss_ctc=71.658, loss_att=56.942, acc=0.730, loss=61.357, backward_time=1.035, grad_norm=138.515, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.820e-05, train_time=2.735 +[gpub001:0/64] 2023-07-15 23:16:22,766 (trainer:732) INFO: 55epoch:train:8201-8300batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=60.074, loss_att=47.491, acc=0.727, loss=51.266, backward_time=1.030, grad_norm=119.828, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.819e-05, train_time=2.721 +[gpub001:0/64] 2023-07-15 23:17:21,807 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub001:0/64] 2023-07-15 23:17:40,310 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 23:17:44,092 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 23:17:44,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub001:0/64] 2023-07-15 23:17:44,098 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 23:22:06,581 (trainer:732) INFO: 55epoch:train:8301-8400batch: iter_time=1.931, forward_time=0.162, loss_ctc=70.513, loss_att=50.076, acc=0.728, loss=56.207, backward_time=1.053, grad_norm=120.350, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.819e-05, train_time=6.876 +[gpub001:0/64] 2023-07-15 23:24:24,068 (trainer:732) INFO: 55epoch:train:8401-8500batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=66.708, loss_att=47.369, acc=0.724, loss=53.171, backward_time=1.031, grad_norm=116.972, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.818e-05, train_time=2.750 +[gpub001:0/64] 2023-07-15 23:26:42,851 (trainer:732) INFO: 55epoch:train:8501-8600batch: iter_time=1.086e-04, forward_time=0.167, loss_ctc=78.244, loss_att=58.054, acc=0.714, loss=64.111, backward_time=1.031, grad_norm=173.126, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.818e-05, train_time=2.775 +[gpub001:0/64] 2023-07-15 23:29:02,728 (trainer:732) INFO: 55epoch:train:8601-8700batch: iter_time=1.328e-04, forward_time=0.155, loss_ctc=69.989, loss_att=54.707, acc=0.715, loss=59.291, backward_time=1.031, grad_norm=149.360, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.818e-05, train_time=2.797 +[gpub001:0/64] 2023-07-15 23:31:26,010 (trainer:732) INFO: 55epoch:train:8701-8800batch: iter_time=1.131e-04, forward_time=0.196, loss_ctc=67.734, loss_att=49.675, acc=0.708, loss=55.093, backward_time=1.032, grad_norm=151.352, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.199, optim0_lr0=4.817e-05, train_time=2.865 +[gpub001:0/64] 2023-07-15 23:33:45,012 (trainer:732) INFO: 55epoch:train:8801-8900batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=66.717, loss_att=48.204, acc=0.719, loss=53.758, backward_time=1.032, grad_norm=131.081, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.817e-05, train_time=2.780 +[gpub001:0/64] 2023-07-15 23:36:00,960 (trainer:732) INFO: 55epoch:train:8901-9000batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=70.848, loss_att=55.620, acc=0.725, loss=60.188, backward_time=1.029, grad_norm=143.176, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.816e-05, train_time=2.719 +[gpub001:0/64] 2023-07-15 23:38:18,241 (trainer:732) INFO: 55epoch:train:9001-9100batch: iter_time=1.011e-04, forward_time=0.150, loss_ctc=62.371, loss_att=49.098, acc=0.726, loss=53.080, backward_time=1.032, grad_norm=147.200, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.816e-05, train_time=2.745 +[gpub001:0/64] 2023-07-15 23:40:07,256 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub001:0/64] 2023-07-15 23:40:25,764 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-15 23:40:29,558 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-15 23:40:29,558 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub001:0/64] 2023-07-15 23:40:29,564 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-15 23:45:21,086 (trainer:732) INFO: 55epoch:train:9101-9200batch: iter_time=2.793, forward_time=0.191, loss_ctc=66.256, loss_att=48.196, acc=0.723, loss=53.614, backward_time=1.044, grad_norm=112.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.815e-05, train_time=8.457 +[gpub001:0/64] 2023-07-15 23:47:43,332 (trainer:732) INFO: 55epoch:train:9201-9300batch: iter_time=9.859e-05, forward_time=0.145, loss_ctc=70.085, loss_att=49.743, acc=0.725, loss=55.846, backward_time=1.042, grad_norm=152.031, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.815e-05, train_time=2.845 +[gpub001:0/64] 2023-07-15 23:50:02,620 (trainer:732) INFO: 55epoch:train:9301-9400batch: iter_time=9.019e-05, forward_time=0.145, loss_ctc=69.933, loss_att=52.729, acc=0.721, loss=57.890, backward_time=1.040, grad_norm=139.705, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.814e-05, train_time=2.786 +[gpub001:0/64] 2023-07-15 23:52:24,700 (trainer:732) INFO: 55epoch:train:9401-9500batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=72.083, loss_att=48.816, acc=0.733, loss=55.796, backward_time=1.032, grad_norm=144.424, clip=100.000, loss_scale=5.776e+32, optim_step_time=0.181, optim0_lr0=4.814e-05, train_time=2.841 +[gpub001:0/64] 2023-07-15 23:54:49,710 (trainer:732) INFO: 55epoch:train:9501-9600batch: iter_time=1.283e-04, forward_time=0.196, loss_ctc=70.036, loss_att=53.272, acc=0.706, loss=58.301, backward_time=1.036, grad_norm=201.540, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=4.814e-05, train_time=2.899 +[gpub001:0/64] 2023-07-15 23:57:14,693 (trainer:732) INFO: 55epoch:train:9601-9700batch: iter_time=1.226e-04, forward_time=0.148, loss_ctc=67.979, loss_att=52.018, acc=0.708, loss=56.807, backward_time=1.042, grad_norm=195.141, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.813e-05, train_time=2.900 +[gpub001:0/64] 2023-07-15 23:59:36,152 (trainer:732) INFO: 55epoch:train:9701-9800batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=68.810, loss_att=49.128, acc=0.729, loss=55.032, backward_time=1.052, grad_norm=143.908, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.813e-05, train_time=2.829 +[gpub001:0/64] 2023-07-16 00:02:00,185 (trainer:732) INFO: 55epoch:train:9801-9900batch: iter_time=1.242e-04, forward_time=0.147, loss_ctc=66.852, loss_att=52.866, acc=0.723, loss=57.062, backward_time=1.040, grad_norm=130.349, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.812e-05, train_time=2.881 +[gpub001:0/64] 2023-07-16 00:04:16,085 (trainer:732) INFO: 55epoch:train:9901-10000batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=62.767, loss_att=47.965, acc=0.721, loss=52.406, backward_time=1.029, grad_norm=113.224, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.812e-05, train_time=2.718 +[gpub001:0/64] 2023-07-16 00:16:37,923 (trainer:338) INFO: 55epoch results: [train] iter_time=0.244, forward_time=0.152, loss_ctc=69.296, loss_att=51.744, acc=0.719, loss=57.010, backward_time=1.035, grad_norm=137.602, clip=100.000, loss_scale=2.636e+32, optim_step_time=0.182, optim0_lr0=4.834e-05, train_time=3.371, time=4 hours, 41 minutes and 10.78 seconds, total_count=520000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.158, cer_ctc=0.245, loss_att=35.877, acc=0.700, cer=0.361, wer=0.989, loss=37.761, time=6 minutes and 6.77 seconds, total_count=53130, gpu_max_cached_mem_GB=37.635, [att_plot] time=6 minutes and 0.96 seconds, total_count=0, gpu_max_cached_mem_GB=37.635 +[gpub001:0/64] 2023-07-16 00:16:57,201 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-16 00:16:57,394 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/38epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/50epoch.pth +[gpub001:0/64] 2023-07-16 00:16:57,395 (trainer:272) INFO: 56/60epoch started. Estimated time to finish: 1 day, 49 minutes and 46.32 seconds +[gpub001:0/64] 2023-07-16 00:16:59,197 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-16 00:17:17,371 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-16 00:17:22,642 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-16 00:17:22,642 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub001:0/64] 2023-07-16 00:17:22,740 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-16 00:25:03,887 (trainer:732) INFO: 56epoch:train:1-100batch: iter_time=3.403, forward_time=0.191, loss_ctc=67.004, loss_att=52.989, acc=0.697, loss=57.193, backward_time=1.047, grad_norm=134.202, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=4.811e-05, train_time=9.711 +[gpub001:0/64] 2023-07-16 00:26:04,353 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub001:0/64] 2023-07-16 00:27:28,104 (trainer:732) INFO: 56epoch:train:101-200batch: iter_time=9.493e-05, forward_time=0.145, loss_ctc=68.220, loss_att=50.035, acc=0.712, loss=55.491, backward_time=1.051, grad_norm=141.648, clip=100.000, loss_scale=4.570e+32, optim_step_time=0.182, optim0_lr0=4.811e-05, train_time=2.884 +[gpub001:0/64] 2023-07-16 00:29:45,320 (trainer:732) INFO: 56epoch:train:201-300batch: iter_time=1.056e-04, forward_time=0.143, loss_ctc=82.452, loss_att=59.724, acc=0.705, loss=66.542, backward_time=1.029, grad_norm=146.750, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.744 +[gpub001:0/64] 2023-07-16 00:32:02,788 (trainer:732) INFO: 56epoch:train:301-400batch: iter_time=9.678e-05, forward_time=0.144, loss_ctc=72.797, loss_att=51.224, acc=0.713, loss=57.695, backward_time=1.028, grad_norm=135.271, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.749 +[gpub001:0/64] 2023-07-16 00:34:19,996 (trainer:732) INFO: 56epoch:train:401-500batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=64.462, loss_att=47.713, acc=0.715, loss=52.738, backward_time=1.028, grad_norm=123.919, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.744 +[gpub001:0/64] 2023-07-16 00:36:48,635 (trainer:732) INFO: 56epoch:train:501-600batch: iter_time=2.047e-04, forward_time=0.231, loss_ctc=68.197, loss_att=52.344, acc=0.718, loss=57.100, backward_time=1.043, grad_norm=143.676, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.809e-05, train_time=2.972 +[gpub001:0/64] 2023-07-16 00:39:11,940 (trainer:732) INFO: 56epoch:train:601-700batch: iter_time=7.600e-04, forward_time=0.198, loss_ctc=77.810, loss_att=56.459, acc=0.705, loss=62.865, backward_time=1.034, grad_norm=136.066, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.809e-05, train_time=2.866 +[gpub001:0/64] 2023-07-16 00:41:30,733 (trainer:732) INFO: 56epoch:train:701-800batch: iter_time=9.654e-05, forward_time=0.146, loss_ctc=68.615, loss_att=51.269, acc=0.712, loss=56.472, backward_time=1.030, grad_norm=132.539, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.808e-05, train_time=2.776 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2157595.0 ON gpub001 CANCELLED AT 2023-07-16T00:41:51 *** diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.10.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.10.log new file mode 100644 index 0000000000000000000000000000000000000000..96816758eecc0dfd3614fbd93317422e486ba465 --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.10.log @@ -0,0 +1,4404 @@ +# Running on gpua014.delta.ncsa.illinois.edu +# Started at Mon Jul 3 02:21:56 CDT 2023 +# SLURMD_NODENAME=gpua014 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2118951 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2118951 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpua[014-016,018,020-022,041,060,062-063,068,088,091,093,096]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA100x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpua[014-016,018,020-022,041,060,062-063,068,088,091,093,096]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=1504675 +# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua014 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d +[gpua014:0/64] 2023-07-03 02:24:21,574 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpua014:0/64] 2023-07-03 02:24:24,018 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpua014:0/64] 2023-07-03 02:24:24,051 (s2t:483) INFO: Vocabulary size: 50002 +[gpua014:0/64] 2023-07-03 02:24:39,279 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpua014:0/64] 2023-07-03 02:24:39,288 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpua014:0/64] 2023-07-03 02:24:39,288 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpua014:0/64] 2023-07-03 02:24:39,288 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpua014:0/64] 2023-07-03 02:24:39,290 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpua014:0/64] 2023-07-03 02:24:39,991 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpua014:0/64] 2023-07-03 02:24:48,871 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 02:24:49,062 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 02:24:49,062 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpua014:0/64] 2023-07-03 02:24:49,071 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpua014:0/64] 2023-07-03 02:24:49,550 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 02:24:49,856 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 02:24:49,856 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpua014:0/64] 2023-07-03 02:24:49,856 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpua014:0/64] 2023-07-03 02:25:21,792 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpua014:1504761:1504761 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0> +gpua014:1504761:1504761 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua014:1504761:1504761 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpua014:0/64] 2023-07-03 02:25:26,795 (trainer:284) INFO: 6/100epoch started +[gpua014:0/64] 2023-07-03 02:25:26,841 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua014:0/64] 2023-07-03 02:25:48,338 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 02:25:52,384 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 02:25:52,384 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpua014:0/64] 2023-07-03 02:25:52,392 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +gpua018:3479289:3479289 [2] NCCL INFO cudaDriverVersion 12010 +gpua018:3479289:3479289 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.18<0> +gpua018:3479289:3479289 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua018:3479289:3479368 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.18<0> +gpua018:3479289:3479368 [2] NCCL INFO Using network IB +gpua018:3479289:3479368 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua018:3479289:3479368 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpua018:3479289:3479368 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read +gpua018:3479289:3479368 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read +gpua018:3479289:3479368 [2] NCCL INFO Connected all rings +gpua018:3479289:3479368 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read +gpua018:3479289:3479368 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read +gpua018:3479289:3479368 [2] NCCL INFO Connected all trees +gpua018:3479289:3479368 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua018:3479289:3479368 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua018:3479289:3479368 [2] NCCL INFO comm 0x50f23dd0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua018:3479290:3479290 [3] NCCL INFO cudaDriverVersion 12010 +gpua018:3479290:3479290 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.18<0> +gpua018:3479290:3479290 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua018:3479290:3479367 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.18<0> +gpua018:3479290:3479367 [3] NCCL INFO Using network IB +gpua018:3479290:3479367 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua018:3479290:3479367 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpua018:3479290:3479367 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpua018:3479290:3479367 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpua018:3479290:3479367 [3] NCCL INFO Connected all rings +gpua018:3479290:3479367 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read +gpua018:3479290:3479367 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read +gpua018:3479290:3479367 [3] NCCL INFO Connected all trees +gpua018:3479290:3479367 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua018:3479290:3479367 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua018:3479290:3479367 [3] NCCL INFO comm 0xb9d17510 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua018:3479288:3479288 [1] NCCL INFO cudaDriverVersion 12010 +gpua018:3479288:3479288 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.18<0> +gpua018:3479288:3479288 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua018:3479288:3479369 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.18<0> +gpua018:3479288:3479369 [1] NCCL INFO Using network IB +gpua018:3479288:3479369 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua018:3479288:3479369 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpua018:3479288:3479369 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read +gpua018:3479288:3479369 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read +gpua018:3479288:3479369 [1] NCCL INFO Connected all rings +gpua018:3479288:3479369 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpua018:3479288:3479369 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpua018:3479288:3479369 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read +gpua018:3479288:3479369 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read +gpua018:3479288:3479369 [1] NCCL INFO Connected all trees +gpua018:3479288:3479369 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua018:3479288:3479369 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua018:3479288:3479369 [1] NCCL INFO comm 0x5176eca0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua022:3213423:3213423 [2] NCCL INFO cudaDriverVersion 12010 +gpua022:3213423:3213423 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.22<0> +gpua022:3213423:3213423 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua022:3213423:3213495 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.22<0> +gpua022:3213423:3213495 [2] NCCL INFO Using network IB +gpua022:3213423:3213495 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua022:3213423:3213495 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpua022:3213423:3213495 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read +gpua022:3213423:3213495 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read +gpua022:3213423:3213495 [2] NCCL INFO Connected all rings +gpua022:3213423:3213495 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read +gpua022:3213423:3213495 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read +gpua022:3213423:3213495 [2] NCCL INFO Connected all trees +gpua022:3213423:3213495 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua022:3213423:3213495 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua022:3213423:3213495 [2] NCCL INFO comm 0xb5a97440 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua018:3479287:3479287 [0] NCCL INFO cudaDriverVersion 12010 +gpua018:3479287:3479287 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.18<0> +gpua018:3479287:3479287 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua018:3479287:3479366 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.18<0> +gpua018:3479287:3479366 [0] NCCL INFO Using network IB +gpua018:3479287:3479366 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua018:3479287:3479366 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpua018:3479287:3479366 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpua018:3479287:3479366 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read +gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read +gpua018:3479287:3479366 [0] NCCL INFO Connected all rings +gpua018:3479287:3479366 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpua018:3479287:3479366 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpua018:3479287:3479366 [0] NCCL INFO Connected all trees +gpua018:3479287:3479366 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua018:3479287:3479366 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua018:3479287:3479366 [0] NCCL INFO comm 0xa20beed0 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua022:3213424:3213424 [3] NCCL INFO cudaDriverVersion 12010 +gpua022:3213424:3213424 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.22<0> +gpua022:3213424:3213424 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua022:3213424:3213496 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.22<0> +gpua022:3213424:3213496 [3] NCCL INFO Using network IB +gpua022:3213424:3213496 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua022:3213424:3213496 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpua022:3213424:3213496 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpua022:3213424:3213496 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpua022:3213424:3213496 [3] NCCL INFO Connected all rings +gpua022:3213424:3213496 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read +gpua022:3213424:3213496 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read +gpua022:3213424:3213496 [3] NCCL INFO Connected all trees +gpua022:3213424:3213496 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua022:3213424:3213496 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua022:3213424:3213496 [3] NCCL INFO comm 0x4eeb3510 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua041:2383593:2383593 [1] NCCL INFO cudaDriverVersion 12010 +gpua041:2383593:2383593 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.41<0> +gpua041:2383593:2383593 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua041:2383593:2383733 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.41<0> +gpua041:2383593:2383733 [1] NCCL INFO Using network IB +gpua041:2383593:2383733 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua041:2383593:2383733 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpua041:2383593:2383733 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read +gpua041:2383593:2383733 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read +gpua041:2383593:2383733 [1] NCCL INFO Connected all rings +gpua041:2383593:2383733 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpua041:2383593:2383733 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpua041:2383593:2383733 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read +gpua041:2383593:2383733 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read +gpua041:2383593:2383733 [1] NCCL INFO Connected all trees +gpua041:2383593:2383733 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua041:2383593:2383733 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua041:2383593:2383733 [1] NCCL INFO comm 0x4f62e590 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua022:3213422:3213422 [1] NCCL INFO cudaDriverVersion 12010 +gpua022:3213422:3213422 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.22<0> +gpua022:3213422:3213422 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua022:3213422:3213494 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.22<0> +gpua022:3213422:3213494 [1] NCCL INFO Using network IB +gpua022:3213422:3213494 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua022:3213422:3213494 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpua022:3213422:3213494 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read +gpua022:3213422:3213494 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read +gpua022:3213422:3213494 [1] NCCL INFO Connected all rings +gpua022:3213422:3213494 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpua022:3213422:3213494 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpua022:3213422:3213494 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read +gpua022:3213422:3213494 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read +gpua022:3213422:3213494 [1] NCCL INFO Connected all trees +gpua022:3213422:3213494 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua022:3213422:3213494 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua022:3213422:3213494 [1] NCCL INFO comm 0xba8d1d30 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua063:1316628:1316628 [3] NCCL INFO cudaDriverVersion 12010 +gpua063:1316628:1316628 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> +gpua063:1316628:1316628 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua063:1316628:1316705 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> +gpua063:1316628:1316705 [3] NCCL INFO Using network IB +gpua063:1316628:1316705 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua063:1316628:1316705 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpua063:1316628:1316705 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpua063:1316628:1316705 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpua063:1316628:1316705 [3] NCCL INFO Connected all rings +gpua063:1316628:1316705 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read +gpua063:1316628:1316705 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read +gpua063:1316628:1316705 [3] NCCL INFO Connected all trees +gpua063:1316628:1316705 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua063:1316628:1316705 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua063:1316628:1316705 [3] NCCL INFO comm 0x50f53420 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua091:1092313:1092313 [2] NCCL INFO cudaDriverVersion 12010 +gpua091:1092313:1092313 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.91<0> +gpua091:1092313:1092313 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua091:1092313:1092405 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.91<0> +gpua091:1092313:1092405 [2] NCCL INFO Using network IB +gpua091:1092313:1092405 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua091:1092313:1092405 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpua091:1092313:1092405 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read +gpua091:1092313:1092405 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read +gpua091:1092313:1092405 [2] NCCL INFO Connected all rings +gpua091:1092313:1092405 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read +gpua091:1092313:1092405 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read +gpua091:1092313:1092405 [2] NCCL INFO Connected all trees +gpua091:1092313:1092405 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua091:1092313:1092405 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua091:1092313:1092405 [2] NCCL INFO comm 0xb9112ed0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua063:1316626:1316626 [1] NCCL INFO cudaDriverVersion 12010 +gpua063:1316626:1316626 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> +gpua063:1316626:1316626 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua063:1316626:1316707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> +gpua063:1316626:1316707 [1] NCCL INFO Using network IB +gpua063:1316626:1316707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua063:1316626:1316707 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpua063:1316626:1316707 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read +gpua063:1316626:1316707 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read +gpua063:1316626:1316707 [1] NCCL INFO Connected all rings +gpua063:1316626:1316707 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpua063:1316626:1316707 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpua063:1316626:1316707 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read +gpua063:1316626:1316707 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read +gpua063:1316626:1316707 [1] NCCL INFO Connected all trees +gpua063:1316626:1316707 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua063:1316626:1316707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua063:1316626:1316707 [1] NCCL INFO comm 0x50a76db0 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua093:1851602:1851602 [2] NCCL INFO cudaDriverVersion 12010 +gpua093:1851602:1851602 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.93<0> +gpua093:1851602:1851602 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua093:1851602:1851687 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.93<0> +gpua093:1851602:1851687 [2] NCCL INFO Using network IB +gpua093:1851602:1851687 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua093:1851602:1851687 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpua093:1851602:1851687 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read +gpua093:1851602:1851687 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read +gpua093:1851602:1851687 [2] NCCL INFO Connected all rings +gpua093:1851602:1851687 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read +gpua093:1851602:1851687 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read +gpua093:1851602:1851687 [2] NCCL INFO Connected all trees +gpua093:1851602:1851687 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua093:1851602:1851687 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua093:1851602:1851687 [2] NCCL INFO comm 0x8eeaee30 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua093:1851600:1851600 [0] NCCL INFO cudaDriverVersion 12010 +gpua093:1851600:1851600 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.93<0> +gpua093:1851600:1851600 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua093:1851600:1851684 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.93<0> +gpua093:1851600:1851684 [0] NCCL INFO Using network IB +gpua093:1851600:1851684 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua093:1851600:1851684 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpua093:1851600:1851684 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read +gpua093:1851600:1851684 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read +gpua093:1851600:1851684 [0] NCCL INFO Connected all rings +gpua093:1851600:1851684 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpua093:1851600:1851684 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpua093:1851600:1851684 [0] NCCL INFO Connected all trees +gpua093:1851600:1851684 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua093:1851600:1851684 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua093:1851600:1851684 [0] NCCL INFO comm 0x9c356d50 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua096:2182299:2182299 [1] NCCL INFO cudaDriverVersion 12010 +gpua096:2182299:2182299 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.96<0> +gpua096:2182299:2182299 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua096:2182299:2182385 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.96<0> +gpua096:2182299:2182385 [1] NCCL INFO Using network IB +gpua096:2182299:2182385 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua096:2182299:2182385 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpua096:2182299:2182385 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read +gpua096:2182299:2182385 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read +gpua096:2182299:2182385 [1] NCCL INFO Connected all rings +gpua096:2182299:2182385 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read +gpua096:2182299:2182385 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read +gpua096:2182299:2182385 [1] NCCL INFO Connected all trees +gpua096:2182299:2182385 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua096:2182299:2182385 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua096:2182299:2182385 [1] NCCL INFO comm 0x50cf5510 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua063:1316627:1316627 [2] NCCL INFO cudaDriverVersion 12010 +gpua063:1316627:1316627 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> +gpua063:1316627:1316627 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua063:1316627:1316706 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> +gpua063:1316627:1316706 [2] NCCL INFO Using network IB +gpua063:1316627:1316706 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua063:1316627:1316706 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpua063:1316627:1316706 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read +gpua063:1316627:1316706 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read +gpua063:1316627:1316706 [2] NCCL INFO Connected all rings +gpua063:1316627:1316706 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read +gpua063:1316627:1316706 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read +gpua063:1316627:1316706 [2] NCCL INFO Connected all trees +gpua063:1316627:1316706 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua063:1316627:1316706 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua063:1316627:1316706 [2] NCCL INFO comm 0x50e1a4d0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua014:1504764:1504764 [3] NCCL INFO cudaDriverVersion 12010 +gpua014:1504764:1504764 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0> +gpua014:1504764:1504764 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua014:1504764:1504833 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0> +gpua014:1504764:1504833 [3] NCCL INFO Using network IB +gpua014:1504764:1504833 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua014:1504764:1504833 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpua014:1504764:1504833 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpua014:1504764:1504833 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpua014:1504764:1504833 [3] NCCL INFO Connected all rings +gpua014:1504764:1504833 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read +gpua014:1504764:1504833 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read +gpua014:1504764:1504833 [3] NCCL INFO Connected all trees +gpua014:1504764:1504833 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua014:1504764:1504833 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua014:1504764:1504833 [3] NCCL INFO comm 0x90a5180 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua091:1092311:1092311 [0] NCCL INFO cudaDriverVersion 12010 +gpua091:1092311:1092311 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.91<0> +gpua091:1092311:1092311 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua091:1092311:1092403 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.91<0> +gpua091:1092311:1092403 [0] NCCL INFO Using network IB +gpua091:1092311:1092403 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua091:1092311:1092403 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpua091:1092311:1092403 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpua091:1092311:1092403 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read +gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read +gpua091:1092311:1092403 [0] NCCL INFO Connected all rings +gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpua091:1092311:1092403 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpua091:1092311:1092403 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpua091:1092311:1092403 [0] NCCL INFO Connected all trees +gpua091:1092311:1092403 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua091:1092311:1092403 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua091:1092311:1092403 [0] NCCL INFO comm 0xb51c2df0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua021:3546921:3546921 [1] NCCL INFO cudaDriverVersion 12010 +gpua021:3546921:3546921 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.21<0> +gpua021:3546921:3546921 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua021:3546921:3547001 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.21<0> +gpua021:3546921:3547001 [1] NCCL INFO Using network IB +gpua021:3546921:3547001 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua021:3546921:3547001 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpua021:3546921:3547001 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read +gpua021:3546921:3547001 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read +gpua021:3546921:3547001 [1] NCCL INFO Connected all rings +gpua021:3546921:3547001 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpua021:3546921:3547001 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpua021:3546921:3547001 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read +gpua021:3546921:3547001 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read +gpua021:3546921:3547001 [1] NCCL INFO Connected all trees +gpua021:3546921:3547001 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua021:3546921:3547001 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua021:3546921:3547001 [1] NCCL INFO comm 0xb47c80d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua016:2146304:2146304 [0] NCCL INFO cudaDriverVersion 12010 +gpua016:2146304:2146304 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> +gpua016:2146304:2146304 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua016:2146304:2146395 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0> +gpua016:2146304:2146395 [0] NCCL INFO Using network IB +gpua016:2146304:2146395 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua016:2146304:2146395 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpua016:2146304:2146395 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read +gpua016:2146304:2146395 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read +gpua016:2146304:2146395 [0] NCCL INFO Connected all rings +gpua016:2146304:2146395 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpua016:2146304:2146395 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpua016:2146304:2146395 [0] NCCL INFO Connected all trees +gpua016:2146304:2146395 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua016:2146304:2146395 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua016:2146304:2146395 [0] NCCL INFO comm 0x94649e0 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua016:2146305:2146305 [1] NCCL INFO cudaDriverVersion 12010 +gpua016:2146305:2146305 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> +gpua016:2146305:2146305 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua016:2146305:2146397 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0> +gpua016:2146305:2146397 [1] NCCL INFO Using network IB +gpua016:2146305:2146397 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua016:2146305:2146397 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpua016:2146305:2146397 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read +gpua016:2146305:2146397 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read +gpua016:2146305:2146397 [1] NCCL INFO Connected all rings +gpua016:2146305:2146397 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpua016:2146305:2146397 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpua016:2146305:2146397 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read +gpua016:2146305:2146397 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read +gpua016:2146305:2146397 [1] NCCL INFO Connected all trees +gpua016:2146305:2146397 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua016:2146305:2146397 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua016:2146305:2146397 [1] NCCL INFO comm 0x505b7da0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua022:3213421:3213421 [0] NCCL INFO cudaDriverVersion 12010 +gpua022:3213421:3213421 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.22<0> +gpua022:3213421:3213421 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua022:3213421:3213497 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.22<0> +gpua022:3213421:3213497 [0] NCCL INFO Using network IB +gpua022:3213421:3213497 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua022:3213421:3213497 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpua022:3213421:3213497 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read +gpua022:3213421:3213497 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read +gpua022:3213421:3213497 [0] NCCL INFO Connected all rings +gpua022:3213421:3213497 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpua022:3213421:3213497 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpua022:3213421:3213497 [0] NCCL INFO Connected all trees +gpua022:3213421:3213497 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua022:3213421:3213497 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua022:3213421:3213497 [0] NCCL INFO comm 0x5127a110 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua020:3382569:3382569 [3] NCCL INFO cudaDriverVersion 12010 +gpua020:3382569:3382569 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.20<0> +gpua020:3382569:3382569 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua020:3382569:3382644 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.20<0> +gpua020:3382569:3382644 [3] NCCL INFO Using network IB +gpua020:3382569:3382644 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua020:3382569:3382644 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpua020:3382569:3382644 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpua020:3382569:3382644 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpua020:3382569:3382644 [3] NCCL INFO Connected all rings +gpua020:3382569:3382644 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read +gpua020:3382569:3382644 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read +gpua020:3382569:3382644 [3] NCCL INFO Connected all trees +gpua020:3382569:3382644 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua020:3382569:3382644 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua020:3382569:3382644 [3] NCCL INFO comm 0x50adbf90 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua021:3546922:3546922 [2] NCCL INFO cudaDriverVersion 12010 +gpua021:3546922:3546922 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.21<0> +gpua021:3546922:3546922 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua021:3546922:3547000 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.21<0> +gpua021:3546922:3547000 [2] NCCL INFO Using network IB +gpua021:3546922:3547000 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua021:3546922:3547000 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpua021:3546922:3547000 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read +gpua021:3546922:3547000 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read +gpua021:3546922:3547000 [2] NCCL INFO Connected all rings +gpua021:3546922:3547000 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read +gpua021:3546922:3547000 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read +gpua021:3546922:3547000 [2] NCCL INFO Connected all trees +gpua021:3546922:3547000 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua021:3546922:3547000 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua021:3546922:3547000 [2] NCCL INFO comm 0xb64560d0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua041:2383592:2383592 [0] NCCL INFO cudaDriverVersion 12010 +gpua041:2383592:2383592 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.41<0> +gpua041:2383592:2383592 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua041:2383592:2383731 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.41<0> +gpua041:2383592:2383731 [0] NCCL INFO Using network IB +gpua041:2383592:2383731 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua041:2383592:2383731 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpua041:2383592:2383731 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpua041:2383592:2383731 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read +gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read +gpua041:2383592:2383731 [0] NCCL INFO Connected all rings +gpua041:2383592:2383731 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpua041:2383592:2383731 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpua041:2383592:2383731 [0] NCCL INFO Connected all trees +gpua041:2383592:2383731 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua041:2383592:2383731 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua041:2383592:2383731 [0] NCCL INFO comm 0x500f1b90 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua015:2678599:2678599 [1] NCCL INFO cudaDriverVersion 12010 +gpua015:2678599:2678599 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.15<0> +gpua015:2678599:2678599 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua015:2678599:2678672 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.15<0> +gpua015:2678599:2678672 [1] NCCL INFO Using network IB +gpua015:2678599:2678672 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua015:2678599:2678672 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpua015:2678599:2678672 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read +gpua015:2678599:2678672 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read +gpua015:2678599:2678672 [1] NCCL INFO Connected all rings +gpua015:2678599:2678672 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpua015:2678599:2678672 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpua015:2678599:2678672 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read +gpua015:2678599:2678672 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read +gpua015:2678599:2678672 [1] NCCL INFO Connected all trees +gpua015:2678599:2678672 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua015:2678599:2678672 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua015:2678599:2678672 [1] NCCL INFO comm 0xb6725330 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua088:4022850:4022850 [1] NCCL INFO cudaDriverVersion 12010 +gpua088:4022850:4022850 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.88<0> +gpua088:4022850:4022850 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua088:4022850:4022934 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.88<0> +gpua088:4022850:4022934 [1] NCCL INFO Using network IB +gpua088:4022850:4022934 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua088:4022850:4022934 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpua088:4022850:4022934 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read +gpua088:4022850:4022934 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read +gpua088:4022850:4022934 [1] NCCL INFO Connected all rings +gpua088:4022850:4022934 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpua088:4022850:4022934 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpua062:3999115:3999115 [0] NCCL INFO cudaDriverVersion 12010 +gpua062:3999115:3999115 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0> +gpua062:3999115:3999115 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua062:3999115:3999187 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0> +gpua062:3999115:3999187 [0] NCCL INFO Using network IB +gpua062:3999115:3999187 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua062:3999115:3999187 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpua062:3999115:3999187 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpua062:3999115:3999187 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read +gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read +gpua062:3999115:3999187 [0] NCCL INFO Connected all rings +gpua088:4022850:4022934 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read +gpua088:4022850:4022934 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read +gpua088:4022850:4022934 [1] NCCL INFO Connected all trees +gpua088:4022850:4022934 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua088:4022850:4022934 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua088:4022850:4022934 [1] NCCL INFO comm 0xa543f510 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpua062:3999115:3999187 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpua062:3999115:3999187 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpua062:3999115:3999187 [0] NCCL INFO Connected all trees +gpua062:3999115:3999187 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua062:3999115:3999187 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua062:3999115:3999187 [0] NCCL INFO comm 0x4f5279e0 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua021:3546923:3546923 [3] NCCL INFO cudaDriverVersion 12010 +gpua021:3546923:3546923 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.21<0> +gpua021:3546923:3546923 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua021:3546923:3547002 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.21<0> +gpua021:3546923:3547002 [3] NCCL INFO Using network IB +gpua021:3546923:3547002 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua021:3546923:3547002 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpua021:3546923:3547002 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpua021:3546923:3547002 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpua021:3546923:3547002 [3] NCCL INFO Connected all rings +gpua021:3546923:3547002 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read +gpua021:3546923:3547002 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read +gpua021:3546923:3547002 [3] NCCL INFO Connected all trees +gpua021:3546923:3547002 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua021:3546923:3547002 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua021:3546923:3547002 [3] NCCL INFO comm 0x51142940 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua091:1092312:1092312 [1] NCCL INFO cudaDriverVersion 12010 +gpua091:1092312:1092312 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.91<0> +gpua091:1092312:1092312 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua091:1092312:1092406 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.91<0> +gpua091:1092312:1092406 [1] NCCL INFO Using network IB +gpua091:1092312:1092406 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua091:1092312:1092406 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpua091:1092312:1092406 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read +gpua091:1092312:1092406 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read +gpua091:1092312:1092406 [1] NCCL INFO Connected all rings +gpua091:1092312:1092406 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpua091:1092312:1092406 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpua091:1092312:1092406 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read +gpua091:1092312:1092406 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read +gpua091:1092312:1092406 [1] NCCL INFO Connected all trees +gpua091:1092312:1092406 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua091:1092312:1092406 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua091:1092312:1092406 [1] NCCL INFO comm 0xb15c78d0 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua041:2383595:2383595 [3] NCCL INFO cudaDriverVersion 12010 +gpua041:2383595:2383595 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.41<0> +gpua041:2383595:2383595 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua041:2383595:2383732 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.41<0> +gpua041:2383595:2383732 [3] NCCL INFO Using network IB +gpua041:2383595:2383732 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua041:2383595:2383732 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpua041:2383595:2383732 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpua041:2383595:2383732 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpua041:2383595:2383732 [3] NCCL INFO Connected all rings +gpua041:2383595:2383732 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read +gpua041:2383595:2383732 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read +gpua041:2383595:2383732 [3] NCCL INFO Connected all trees +gpua041:2383595:2383732 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua041:2383595:2383732 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua041:2383595:2383732 [3] NCCL INFO comm 0x50d6c2c0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua014:1504763:1504763 [2] NCCL INFO cudaDriverVersion 12010 +gpua014:1504763:1504763 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0> +gpua014:1504763:1504763 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua014:1504763:1504834 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0> +gpua014:1504763:1504834 [2] NCCL INFO Using network IB +gpua014:1504763:1504834 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua014:1504763:1504834 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpua014:1504763:1504834 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read +gpua014:1504763:1504834 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read +gpua014:1504763:1504834 [2] NCCL INFO Connected all rings +gpua014:1504763:1504834 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read +gpua014:1504763:1504834 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read +gpua014:1504763:1504834 [2] NCCL INFO Connected all trees +gpua014:1504763:1504834 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua014:1504763:1504834 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua014:1504763:1504834 [2] NCCL INFO comm 0x8d97bae0 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua020:3382567:3382567 [1] NCCL INFO cudaDriverVersion 12010 +gpua020:3382567:3382567 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.20<0> +gpua020:3382567:3382567 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua020:3382567:3382643 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.20<0> +gpua020:3382567:3382643 [1] NCCL INFO Using network IB +gpua020:3382567:3382643 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua020:3382567:3382643 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpua020:3382567:3382643 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read +gpua020:3382567:3382643 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read +gpua020:3382567:3382643 [1] NCCL INFO Connected all rings +gpua020:3382567:3382643 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpua020:3382567:3382643 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpua020:3382567:3382643 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read +gpua020:3382567:3382643 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read +gpua020:3382567:3382643 [1] NCCL INFO Connected all trees +gpua020:3382567:3382643 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua020:3382567:3382643 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua020:3382567:3382643 [1] NCCL INFO comm 0x50206940 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua062:3999116:3999116 [1] NCCL INFO cudaDriverVersion 12010 +gpua062:3999116:3999116 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0> +gpua062:3999116:3999116 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua062:3999116:3999189 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0> +gpua062:3999116:3999189 [1] NCCL INFO Using network IB +gpua062:3999116:3999189 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua062:3999116:3999189 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpua062:3999116:3999189 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read +gpua062:3999116:3999189 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read +gpua062:3999116:3999189 [1] NCCL INFO Connected all rings +gpua062:3999116:3999189 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpua062:3999116:3999189 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpua062:3999116:3999189 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read +gpua062:3999116:3999189 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read +gpua062:3999116:3999189 [1] NCCL INFO Connected all trees +gpua062:3999116:3999189 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua062:3999116:3999189 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua062:3999116:3999189 [1] NCCL INFO comm 0x8302c90 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua015:2678598:2678598 [0] NCCL INFO cudaDriverVersion 12010 +gpua015:2678598:2678598 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.15<0> +gpua015:2678598:2678598 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua015:2678598:2678673 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.15<0> +gpua015:2678598:2678673 [0] NCCL INFO Using network IB +gpua015:2678598:2678673 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua015:2678598:2678673 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpua015:2678598:2678673 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpua015:2678598:2678673 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read +gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read +gpua015:2678598:2678673 [0] NCCL INFO Connected all rings +gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpua015:2678598:2678673 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpua015:2678598:2678673 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpua015:2678598:2678673 [0] NCCL INFO Connected all trees +gpua015:2678598:2678673 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua015:2678598:2678673 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua015:2678598:2678673 [0] NCCL INFO comm 0x5031b3d0 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua016:2146307:2146307 [3] NCCL INFO cudaDriverVersion 12010 +gpua016:2146307:2146307 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> +gpua016:2146307:2146307 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua016:2146307:2146396 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0> +gpua016:2146307:2146396 [3] NCCL INFO Using network IB +gpua016:2146307:2146396 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua016:2146307:2146396 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpua016:2146307:2146396 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpua016:2146307:2146396 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpua016:2146307:2146396 [3] NCCL INFO Connected all rings +gpua016:2146307:2146396 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read +gpua016:2146307:2146396 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read +gpua016:2146307:2146396 [3] NCCL INFO Connected all trees +gpua016:2146307:2146396 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua016:2146307:2146396 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua016:2146307:2146396 [3] NCCL INFO comm 0xb69ce0b0 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua068:4120162:4120162 [0] NCCL INFO cudaDriverVersion 12010 +gpua068:4120162:4120162 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.68<0> +gpua068:4120162:4120162 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua068:4120162:4120251 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.68<0> +gpua068:4120162:4120251 [0] NCCL INFO Using network IB +gpua068:4120162:4120251 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua068:4120162:4120251 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpua068:4120162:4120251 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpua068:4120162:4120251 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read +gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read +gpua068:4120162:4120251 [0] NCCL INFO Connected all rings +gpua068:4120162:4120251 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpua068:4120162:4120251 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpua068:4120162:4120251 [0] NCCL INFO Connected all trees +gpua068:4120162:4120251 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua068:4120162:4120251 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua068:4120162:4120251 [0] NCCL INFO comm 0x955bda0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua068:4120163:4120163 [1] NCCL INFO cudaDriverVersion 12010 +gpua068:4120163:4120163 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.68<0> +gpua068:4120163:4120163 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua068:4120163:4120252 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.68<0> +gpua068:4120163:4120252 [1] NCCL INFO Using network IB +gpua068:4120163:4120252 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua068:4120163:4120252 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpua068:4120163:4120252 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read +gpua068:4120163:4120252 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read +gpua068:4120163:4120252 [1] NCCL INFO Connected all rings +gpua068:4120163:4120252 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpua068:4120163:4120252 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpua068:4120163:4120252 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read +gpua068:4120163:4120252 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read +gpua068:4120163:4120252 [1] NCCL INFO Connected all trees +gpua068:4120163:4120252 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua068:4120163:4120252 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua068:4120163:4120252 [1] NCCL INFO comm 0x4ffe8620 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua068:4120165:4120165 [3] NCCL INFO cudaDriverVersion 12010 +gpua068:4120165:4120165 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.68<0> +gpua068:4120165:4120165 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua068:4120165:4120250 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.68<0> +gpua068:4120165:4120250 [3] NCCL INFO Using network IB +gpua068:4120165:4120250 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua068:4120165:4120250 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpua068:4120165:4120250 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpua068:4120165:4120250 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpua068:4120165:4120250 [3] NCCL INFO Connected all rings +gpua068:4120165:4120250 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read +gpua068:4120165:4120250 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read +gpua068:4120165:4120250 [3] NCCL INFO Connected all trees +gpua068:4120165:4120250 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua068:4120165:4120250 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua068:4120165:4120250 [3] NCCL INFO comm 0x8d5e2480 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua068:4120164:4120164 [2] NCCL INFO cudaDriverVersion 12010 +gpua068:4120164:4120164 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.68<0> +gpua068:4120164:4120164 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua068:4120164:4120253 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.68<0> +gpua068:4120164:4120253 [2] NCCL INFO Using network IB +gpua068:4120164:4120253 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua068:4120164:4120253 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpua068:4120164:4120253 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read +gpua068:4120164:4120253 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read +gpua068:4120164:4120253 [2] NCCL INFO Connected all rings +gpua068:4120164:4120253 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read +gpua068:4120164:4120253 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read +gpua068:4120164:4120253 [2] NCCL INFO Connected all trees +gpua068:4120164:4120253 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua068:4120164:4120253 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua068:4120164:4120253 [2] NCCL INFO comm 0xb899ffd0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua060:2765420:2765420 [0] NCCL INFO cudaDriverVersion 12010 +gpua060:2765420:2765420 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2765420:2765420 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2765420:2765495 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2765420:2765495 [0] NCCL INFO Using network IB +gpua060:2765420:2765495 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua060:2765420:2765495 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpua060:2765420:2765495 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read +gpua060:2765420:2765495 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read +gpua060:2765420:2765495 [0] NCCL INFO Connected all rings +gpua060:2765420:2765495 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpua060:2765420:2765495 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpua060:2765420:2765495 [0] NCCL INFO Connected all trees +gpua060:2765420:2765495 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2765420:2765495 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2765420:2765495 [0] NCCL INFO comm 0x8ee9a7d0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua060:2765422:2765422 [2] NCCL INFO cudaDriverVersion 12010 +gpua060:2765422:2765422 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2765422:2765422 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2765422:2765496 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2765422:2765496 [2] NCCL INFO Using network IB +gpua060:2765422:2765496 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua060:2765422:2765496 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpua060:2765422:2765496 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read +gpua060:2765422:2765496 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read +gpua060:2765422:2765496 [2] NCCL INFO Connected all rings +gpua060:2765422:2765496 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read +gpua060:2765422:2765496 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read +gpua060:2765422:2765496 [2] NCCL INFO Connected all trees +gpua060:2765422:2765496 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2765422:2765496 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2765422:2765496 [2] NCCL INFO comm 0x50dc0f50 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua041:2383594:2383594 [2] NCCL INFO cudaDriverVersion 12010 +gpua041:2383594:2383594 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.41<0> +gpua041:2383594:2383594 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua041:2383594:2383734 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.41<0> +gpua041:2383594:2383734 [2] NCCL INFO Using network IB +gpua041:2383594:2383734 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua041:2383594:2383734 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpua041:2383594:2383734 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read +gpua041:2383594:2383734 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read +gpua041:2383594:2383734 [2] NCCL INFO Connected all rings +gpua041:2383594:2383734 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read +gpua041:2383594:2383734 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read +gpua041:2383594:2383734 [2] NCCL INFO Connected all trees +gpua041:2383594:2383734 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua041:2383594:2383734 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua041:2383594:2383734 [2] NCCL INFO comm 0xb8aa2570 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua062:3999117:3999117 [2] NCCL INFO cudaDriverVersion 12010 +gpua062:3999117:3999117 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0> +gpua062:3999117:3999117 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua062:3999117:3999188 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0> +gpua062:3999117:3999188 [2] NCCL INFO Using network IB +gpua062:3999117:3999188 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua062:3999117:3999188 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpua062:3999117:3999188 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read +gpua062:3999117:3999188 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read +gpua062:3999117:3999188 [2] NCCL INFO Connected all rings +gpua062:3999117:3999188 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read +gpua062:3999117:3999188 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read +gpua062:3999117:3999188 [2] NCCL INFO Connected all trees +gpua062:3999117:3999188 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua062:3999117:3999188 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua062:3999117:3999188 [2] NCCL INFO comm 0x5126ca20 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua015:2678600:2678600 [2] NCCL INFO cudaDriverVersion 12010 +gpua015:2678600:2678600 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.15<0> +gpua015:2678600:2678600 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua015:2678600:2678674 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.15<0> +gpua015:2678600:2678674 [2] NCCL INFO Using network IB +gpua015:2678600:2678674 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua015:2678600:2678674 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpua015:2678600:2678674 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read +gpua015:2678600:2678674 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read +gpua015:2678600:2678674 [2] NCCL INFO Connected all rings +gpua015:2678600:2678674 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read +gpua015:2678600:2678674 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read +gpua015:2678600:2678674 [2] NCCL INFO Connected all trees +gpua015:2678600:2678674 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua015:2678600:2678674 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua015:2678600:2678674 [2] NCCL INFO comm 0x4fb6eec0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua088:4022852:4022852 [3] NCCL INFO cudaDriverVersion 12010 +gpua088:4022852:4022852 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.88<0> +gpua088:4022852:4022852 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua088:4022852:4022933 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.88<0> +gpua088:4022852:4022933 [3] NCCL INFO Using network IB +gpua088:4022852:4022933 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua088:4022852:4022933 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpua088:4022852:4022933 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpua088:4022852:4022933 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpua088:4022852:4022933 [3] NCCL INFO Connected all rings +gpua088:4022852:4022933 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read +gpua088:4022852:4022933 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read +gpua088:4022852:4022933 [3] NCCL INFO Connected all trees +gpua088:4022852:4022933 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua088:4022852:4022933 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua088:4022852:4022933 [3] NCCL INFO comm 0xb0a25610 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua096:2182301:2182301 [3] NCCL INFO cudaDriverVersion 12010 +gpua096:2182301:2182301 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.96<0> +gpua096:2182301:2182301 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua096:2182301:2182382 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.96<0> +gpua096:2182301:2182382 [3] NCCL INFO Using network IB +gpua096:2182301:2182382 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua096:2182301:2182382 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpua096:2182301:2182382 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpua096:2182301:2182382 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpua096:2182301:2182382 [3] NCCL INFO Connected all rings +gpua096:2182301:2182382 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read +gpua096:2182301:2182382 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read +gpua096:2182301:2182382 [3] NCCL INFO Connected all trees +gpua096:2182301:2182382 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua096:2182301:2182382 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua096:2182301:2182382 [3] NCCL INFO comm 0xb7ff8a70 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua014:1504761:1504831 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0> +gpua014:1504761:1504831 [0] NCCL INFO Using network IB +gpua014:1504761:1504831 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua014:1504761:1504831 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpua014:1504761:1504831 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpua014:1504761:1504831 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpua014:1504761:1504831 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpua014:1504761:1504831 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpua014:1504761:1504831 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read +gpua014:1504761:1504831 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read +gpua014:1504761:1504831 [0] NCCL INFO Connected all rings +gpua014:1504761:1504831 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpua014:1504761:1504831 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpua014:1504761:1504831 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpua014:1504761:1504831 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpua014:1504761:1504831 [0] NCCL INFO Connected all trees +gpua014:1504761:1504831 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua014:1504761:1504831 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua014:1504761:1504831 [0] NCCL INFO comm 0x4fbe12c0 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua093:1851603:1851603 [3] NCCL INFO cudaDriverVersion 12010 +gpua093:1851603:1851603 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.93<0> +gpua093:1851603:1851603 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua093:1851603:1851686 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.93<0> +gpua093:1851603:1851686 [3] NCCL INFO Using network IB +gpua093:1851603:1851686 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua093:1851603:1851686 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpua093:1851603:1851686 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpua093:1851603:1851686 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpua093:1851603:1851686 [3] NCCL INFO Connected all rings +gpua093:1851603:1851686 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read +gpua093:1851603:1851686 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read +gpua093:1851603:1851686 [3] NCCL INFO Connected all trees +gpua093:1851603:1851686 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua093:1851603:1851686 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua093:1851603:1851686 [3] NCCL INFO comm 0x51d23280 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua014:1504762:1504762 [1] NCCL INFO cudaDriverVersion 12010 +gpua014:1504762:1504762 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0> +gpua014:1504762:1504762 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua014:1504762:1504832 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0> +gpua014:1504762:1504832 [1] NCCL INFO Using network IB +gpua014:1504762:1504832 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua014:1504762:1504832 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpua014:1504762:1504832 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read +gpua014:1504762:1504832 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read +gpua014:1504762:1504832 [1] NCCL INFO Connected all rings +gpua014:1504762:1504832 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read +gpua014:1504762:1504832 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read +gpua014:1504762:1504832 [1] NCCL INFO Connected all trees +gpua014:1504762:1504832 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua014:1504762:1504832 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua014:1504762:1504832 [1] NCCL INFO comm 0xa6220c0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua062:3999118:3999118 [3] NCCL INFO cudaDriverVersion 12010 +gpua062:3999118:3999118 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0> +gpua062:3999118:3999118 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua062:3999118:3999190 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0> +gpua062:3999118:3999190 [3] NCCL INFO Using network IB +gpua062:3999118:3999190 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua062:3999118:3999190 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpua062:3999118:3999190 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpua062:3999118:3999190 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpua062:3999118:3999190 [3] NCCL INFO Connected all rings +gpua062:3999118:3999190 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read +gpua062:3999118:3999190 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read +gpua062:3999118:3999190 [3] NCCL INFO Connected all trees +gpua062:3999118:3999190 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua062:3999118:3999190 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua062:3999118:3999190 [3] NCCL INFO comm 0x4f6c8ad0 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua096:2182300:2182300 [2] NCCL INFO cudaDriverVersion 12010 +gpua096:2182300:2182300 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.96<0> +gpua096:2182300:2182300 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua096:2182300:2182383 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.96<0> +gpua096:2182300:2182383 [2] NCCL INFO Using network IB +gpua096:2182300:2182383 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua096:2182300:2182383 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpua096:2182300:2182383 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read +gpua096:2182300:2182383 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read +gpua096:2182300:2182383 [2] NCCL INFO Connected all rings +gpua096:2182300:2182383 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read +gpua096:2182300:2182383 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read +gpua096:2182300:2182383 [2] NCCL INFO Connected all trees +gpua096:2182300:2182383 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua096:2182300:2182383 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua096:2182300:2182383 [2] NCCL INFO comm 0x50397010 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua020:3382568:3382568 [2] NCCL INFO cudaDriverVersion 12010 +gpua020:3382568:3382568 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.20<0> +gpua020:3382568:3382568 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua020:3382568:3382642 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.20<0> +gpua020:3382568:3382642 [2] NCCL INFO Using network IB +gpua020:3382568:3382642 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua020:3382568:3382642 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpua020:3382568:3382642 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read +gpua020:3382568:3382642 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read +gpua020:3382568:3382642 [2] NCCL INFO Connected all rings +gpua020:3382568:3382642 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read +gpua020:3382568:3382642 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read +gpua020:3382568:3382642 [2] NCCL INFO Connected all trees +gpua020:3382568:3382642 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua020:3382568:3382642 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua020:3382568:3382642 [2] NCCL INFO comm 0x4f345750 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua093:1851601:1851601 [1] NCCL INFO cudaDriverVersion 12010 +gpua093:1851601:1851601 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.93<0> +gpua093:1851601:1851601 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua093:1851601:1851685 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.93<0> +gpua093:1851601:1851685 [1] NCCL INFO Using network IB +gpua093:1851601:1851685 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua093:1851601:1851685 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpua093:1851601:1851685 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read +gpua093:1851601:1851685 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read +gpua093:1851601:1851685 [1] NCCL INFO Connected all rings +gpua093:1851601:1851685 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpua093:1851601:1851685 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpua093:1851601:1851685 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read +gpua093:1851601:1851685 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read +gpua093:1851601:1851685 [1] NCCL INFO Connected all trees +gpua093:1851601:1851685 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua093:1851601:1851685 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua093:1851601:1851685 [1] NCCL INFO comm 0x8d2d4770 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua016:2146306:2146306 [2] NCCL INFO cudaDriverVersion 12010 +gpua016:2146306:2146306 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> +gpua016:2146306:2146306 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua016:2146306:2146394 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0> +gpua016:2146306:2146394 [2] NCCL INFO Using network IB +gpua016:2146306:2146394 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua016:2146306:2146394 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpua016:2146306:2146394 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read +gpua016:2146306:2146394 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read +gpua016:2146306:2146394 [2] NCCL INFO Connected all rings +gpua016:2146306:2146394 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read +gpua016:2146306:2146394 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read +gpua016:2146306:2146394 [2] NCCL INFO Connected all trees +gpua016:2146306:2146394 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua016:2146306:2146394 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua016:2146306:2146394 [2] NCCL INFO comm 0xb80c42d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua063:1316625:1316625 [0] NCCL INFO cudaDriverVersion 12010 +gpua063:1316625:1316625 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> +gpua063:1316625:1316625 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua063:1316625:1316708 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> +gpua063:1316625:1316708 [0] NCCL INFO Using network IB +gpua063:1316625:1316708 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua063:1316625:1316708 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpua063:1316625:1316708 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read +gpua063:1316625:1316708 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read +gpua063:1316625:1316708 [0] NCCL INFO Connected all rings +gpua063:1316625:1316708 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpua063:1316625:1316708 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpua063:1316625:1316708 [0] NCCL INFO Connected all trees +gpua063:1316625:1316708 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua063:1316625:1316708 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua063:1316625:1316708 [0] NCCL INFO comm 0x50020800 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua088:4022851:4022851 [2] NCCL INFO cudaDriverVersion 12010 +gpua088:4022851:4022851 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.88<0> +gpua088:4022851:4022851 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua088:4022851:4022931 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.88<0> +gpua088:4022851:4022931 [2] NCCL INFO Using network IB +gpua088:4022851:4022931 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua088:4022851:4022931 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpua088:4022851:4022931 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read +gpua088:4022851:4022931 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read +gpua088:4022851:4022931 [2] NCCL INFO Connected all rings +gpua088:4022851:4022931 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read +gpua088:4022851:4022931 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read +gpua088:4022851:4022931 [2] NCCL INFO Connected all trees +gpua088:4022851:4022931 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua088:4022851:4022931 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua088:4022851:4022931 [2] NCCL INFO comm 0x8b447690 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua021:3546920:3546920 [0] NCCL INFO cudaDriverVersion 12010 +gpua021:3546920:3546920 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.21<0> +gpua021:3546920:3546920 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua021:3546920:3547003 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.21<0> +gpua021:3546920:3547003 [0] NCCL INFO Using network IB +gpua021:3546920:3547003 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua021:3546920:3547003 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpua021:3546920:3547003 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpua021:3546920:3547003 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read +gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read +gpua021:3546920:3547003 [0] NCCL INFO Connected all rings +gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpua021:3546920:3547003 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpua021:3546920:3547003 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpua021:3546920:3547003 [0] NCCL INFO Connected all trees +gpua021:3546920:3547003 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua021:3546920:3547003 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua021:3546920:3547003 [0] NCCL INFO comm 0x91f0590 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua020:3382566:3382566 [0] NCCL INFO cudaDriverVersion 12010 +gpua020:3382566:3382566 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.20<0> +gpua020:3382566:3382566 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua020:3382566:3382645 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.20<0> +gpua020:3382566:3382645 [0] NCCL INFO Using network IB +gpua020:3382566:3382645 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua020:3382566:3382645 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpua020:3382566:3382645 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read +gpua020:3382566:3382645 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read +gpua020:3382566:3382645 [0] NCCL INFO Connected all rings +gpua020:3382566:3382645 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpua020:3382566:3382645 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpua020:3382566:3382645 [0] NCCL INFO Connected all trees +gpua020:3382566:3382645 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua020:3382566:3382645 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua020:3382566:3382645 [0] NCCL INFO comm 0x4ff54b70 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua096:2182298:2182298 [0] NCCL INFO cudaDriverVersion 12010 +gpua096:2182298:2182298 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.96<0> +gpua096:2182298:2182298 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua096:2182298:2182384 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.96<0> +gpua096:2182298:2182384 [0] NCCL INFO Using network IB +gpua096:2182298:2182384 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua096:2182298:2182384 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpua096:2182298:2182384 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpua096:2182298:2182384 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpua096:2182298:2182384 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read +gpua096:2182298:2182384 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read +gpua096:2182298:2182384 [0] NCCL INFO Connected all rings +gpua096:2182298:2182384 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpua096:2182298:2182384 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpua096:2182298:2182384 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpua096:2182298:2182384 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpua096:2182298:2182384 [0] NCCL INFO Connected all trees +gpua096:2182298:2182384 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua096:2182298:2182384 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua096:2182298:2182384 [0] NCCL INFO comm 0x504ff500 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua091:1092314:1092314 [3] NCCL INFO cudaDriverVersion 12010 +gpua091:1092314:1092314 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.91<0> +gpua091:1092314:1092314 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua091:1092314:1092404 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.91<0> +gpua091:1092314:1092404 [3] NCCL INFO Using network IB +gpua091:1092314:1092404 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua091:1092314:1092404 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpua091:1092314:1092404 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpua091:1092314:1092404 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpua091:1092314:1092404 [3] NCCL INFO Connected all rings +gpua091:1092314:1092404 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read +gpua091:1092314:1092404 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read +gpua091:1092314:1092404 [3] NCCL INFO Connected all trees +gpua091:1092314:1092404 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua091:1092314:1092404 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua091:1092314:1092404 [3] NCCL INFO comm 0x508531a0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua088:4022849:4022849 [0] NCCL INFO cudaDriverVersion 12010 +gpua088:4022849:4022849 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.88<0> +gpua088:4022849:4022849 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua088:4022849:4022932 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.88<0> +gpua088:4022849:4022932 [0] NCCL INFO Using network IB +gpua088:4022849:4022932 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua088:4022849:4022932 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpua088:4022849:4022932 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read +gpua088:4022849:4022932 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read +gpua088:4022849:4022932 [0] NCCL INFO Connected all rings +gpua088:4022849:4022932 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpua088:4022849:4022932 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpua088:4022849:4022932 [0] NCCL INFO Connected all trees +gpua088:4022849:4022932 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua088:4022849:4022932 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua088:4022849:4022932 [0] NCCL INFO comm 0x8e555600 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua060:2765423:2765423 [3] NCCL INFO cudaDriverVersion 12010 +gpua060:2765423:2765423 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2765423:2765423 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2765423:2765497 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2765423:2765497 [3] NCCL INFO Using network IB +gpua060:2765423:2765497 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua060:2765423:2765497 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpua060:2765423:2765497 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpua060:2765423:2765497 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpua060:2765423:2765497 [3] NCCL INFO Connected all rings +gpua060:2765423:2765497 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read +gpua060:2765423:2765497 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read +gpua060:2765423:2765497 [3] NCCL INFO Connected all trees +gpua060:2765423:2765497 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2765423:2765497 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2765423:2765497 [3] NCCL INFO comm 0x8bb0f9c0 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua015:2678601:2678601 [3] NCCL INFO cudaDriverVersion 12010 +gpua015:2678601:2678601 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.15<0> +gpua015:2678601:2678601 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua015:2678601:2678671 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.15<0> +gpua015:2678601:2678671 [3] NCCL INFO Using network IB +gpua015:2678601:2678671 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua015:2678601:2678671 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpua015:2678601:2678671 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpua015:2678601:2678671 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpua015:2678601:2678671 [3] NCCL INFO Connected all rings +gpua015:2678601:2678671 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read +gpua015:2678601:2678671 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read +gpua015:2678601:2678671 [3] NCCL INFO Connected all trees +gpua015:2678601:2678671 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua015:2678601:2678671 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua015:2678601:2678671 [3] NCCL INFO comm 0x510fc310 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua060:2765421:2765421 [1] NCCL INFO cudaDriverVersion 12010 +gpua060:2765421:2765421 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2765421:2765421 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2765421:2765498 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2765421:2765498 [1] NCCL INFO Using network IB +gpua060:2765421:2765498 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua060:2765421:2765498 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpua060:2765421:2765498 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read +gpua060:2765421:2765498 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read +gpua060:2765421:2765498 [1] NCCL INFO Connected all rings +gpua060:2765421:2765498 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpua060:2765421:2765498 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpua060:2765421:2765498 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read +gpua060:2765421:2765498 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read +gpua060:2765421:2765498 [1] NCCL INFO Connected all trees +gpua060:2765421:2765498 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2765421:2765498 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2765421:2765498 [1] NCCL INFO comm 0xf88a8d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpua014:0/64] 2023-07-03 02:32:39,445 (trainer:732) INFO: 6epoch:train:1-100batch: iter_time=1.461, forward_time=0.221, loss_ctc=100.864, loss_att=81.642, acc=0.628, loss=87.408, backward_time=0.761, grad_norm=121.325, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.116, optim0_lr0=1.767e-04, train_time=8.651 +[gpua014:0/64] 2023-07-03 02:34:18,760 (trainer:732) INFO: 6epoch:train:101-200batch: iter_time=1.090e-04, forward_time=0.105, loss_ctc=78.378, loss_att=64.090, acc=0.627, loss=68.377, backward_time=0.745, grad_norm=91.037, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.764e-04, train_time=1.987 +[gpua014:0/64] 2023-07-03 02:36:10,033 (trainer:732) INFO: 6epoch:train:201-300batch: iter_time=1.067e-04, forward_time=0.105, loss_ctc=94.972, loss_att=74.236, acc=0.638, loss=80.457, backward_time=0.765, grad_norm=103.237, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.762e-04, train_time=2.225 +[gpua014:0/64] 2023-07-03 02:37:48,817 (trainer:732) INFO: 6epoch:train:301-400batch: iter_time=1.078e-04, forward_time=0.104, loss_ctc=86.070, loss_att=66.443, acc=0.629, loss=72.331, backward_time=0.744, grad_norm=84.955, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.760e-04, train_time=1.975 +[gpua014:0/64] 2023-07-03 02:39:49,150 (trainer:732) INFO: 6epoch:train:401-500batch: iter_time=9.898e-05, forward_time=0.103, loss_ctc=93.921, loss_att=74.105, acc=0.650, loss=80.050, backward_time=0.782, grad_norm=98.045, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.110, optim0_lr0=1.758e-04, train_time=2.406 +[gpua014:0/64] 2023-07-03 02:41:38,188 (trainer:732) INFO: 6epoch:train:501-600batch: iter_time=9.613e-05, forward_time=0.110, loss_ctc=92.212, loss_att=75.110, acc=0.644, loss=80.241, backward_time=0.775, grad_norm=93.984, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.110, optim0_lr0=1.756e-04, train_time=2.181 +[gpua014:0/64] 2023-07-03 02:43:44,084 (trainer:732) INFO: 6epoch:train:601-700batch: iter_time=1.041e-04, forward_time=0.104, loss_ctc=79.713, loss_att=58.628, acc=0.633, loss=64.953, backward_time=0.786, grad_norm=86.077, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.754e-04, train_time=2.518 +[gpua014:0/64] 2023-07-03 02:46:01,463 (trainer:732) INFO: 6epoch:train:701-800batch: iter_time=3.962e-04, forward_time=0.205, loss_ctc=88.209, loss_att=69.865, acc=0.628, loss=75.368, backward_time=0.824, grad_norm=95.380, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.115, optim0_lr0=1.751e-04, train_time=2.747 +[gpua014:0/64] 2023-07-03 02:48:25,715 (trainer:732) INFO: 6epoch:train:801-900batch: iter_time=1.126e-04, forward_time=0.105, loss_ctc=82.181, loss_att=60.909, acc=0.621, loss=67.291, backward_time=0.809, grad_norm=117.747, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.749e-04, train_time=2.885 +[gpua014:0/64] 2023-07-03 02:50:13,828 (trainer:732) INFO: 6epoch:train:901-1000batch: iter_time=1.053e-04, forward_time=0.105, loss_ctc=82.054, loss_att=69.815, acc=0.652, loss=73.486, backward_time=0.763, grad_norm=82.166, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.747e-04, train_time=2.162 +[gpua014:0/64] 2023-07-03 02:50:33,788 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua014:0/64] 2023-07-03 02:50:55,708 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 02:50:59,900 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 02:50:59,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpua014:0/64] 2023-07-03 02:50:59,923 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 02:57:26,461 (trainer:732) INFO: 6epoch:train:1001-1100batch: iter_time=2.656, forward_time=0.185, loss_ctc=94.854, loss_att=79.671, acc=0.629, loss=84.226, backward_time=0.765, grad_norm=116.167, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.115, optim0_lr0=1.745e-04, train_time=8.651 +[gpua014:0/64] 2023-07-03 02:59:06,908 (trainer:732) INFO: 6epoch:train:1101-1200batch: iter_time=1.350e-04, forward_time=0.108, loss_ctc=76.456, loss_att=63.432, acc=0.623, loss=67.339, backward_time=0.749, grad_norm=107.085, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.743e-04, train_time=2.010 +[gpua014:0/64] 2023-07-03 03:00:46,333 (trainer:732) INFO: 6epoch:train:1201-1300batch: iter_time=1.352e-04, forward_time=0.108, loss_ctc=93.595, loss_att=74.995, acc=0.638, loss=80.575, backward_time=0.746, grad_norm=87.098, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.741e-04, train_time=1.988 +[gpua014:0/64] 2023-07-03 03:02:31,926 (trainer:732) INFO: 6epoch:train:1301-1400batch: iter_time=1.261e-04, forward_time=0.107, loss_ctc=84.240, loss_att=65.626, acc=0.628, loss=71.210, backward_time=0.757, grad_norm=81.262, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.739e-04, train_time=2.112 +[gpua014:0/64] 2023-07-03 03:04:16,758 (trainer:732) INFO: 6epoch:train:1401-1500batch: iter_time=1.076e-04, forward_time=0.105, loss_ctc=91.705, loss_att=74.473, acc=0.644, loss=79.643, backward_time=0.764, grad_norm=81.849, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.737e-04, train_time=2.096 +[gpua014:0/64] 2023-07-03 03:05:56,207 (trainer:732) INFO: 6epoch:train:1501-1600batch: iter_time=1.166e-04, forward_time=0.105, loss_ctc=88.849, loss_att=73.349, acc=0.644, loss=77.999, backward_time=0.746, grad_norm=82.749, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.734e-04, train_time=1.989 +[gpua014:0/64] 2023-07-03 03:07:46,119 (trainer:732) INFO: 6epoch:train:1601-1700batch: iter_time=1.090e-04, forward_time=0.105, loss_ctc=81.011, loss_att=59.311, acc=0.626, loss=65.821, backward_time=0.765, grad_norm=87.128, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.732e-04, train_time=2.198 +[gpua014:0/64] 2023-07-03 03:09:56,784 (trainer:732) INFO: 6epoch:train:1701-1800batch: iter_time=1.036e-04, forward_time=0.113, loss_ctc=86.103, loss_att=69.824, acc=0.617, loss=74.708, backward_time=0.828, grad_norm=86.823, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.730e-04, train_time=2.613 +[gpua014:0/64] 2023-07-03 03:12:03,429 (trainer:732) INFO: 6epoch:train:1801-1900batch: iter_time=9.738e-05, forward_time=0.139, loss_ctc=79.047, loss_att=59.219, acc=0.618, loss=65.167, backward_time=0.783, grad_norm=83.558, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.113, optim0_lr0=1.728e-04, train_time=2.533 +[gpua014:0/64] 2023-07-03 03:14:11,604 (trainer:732) INFO: 6epoch:train:1901-2000batch: iter_time=9.493e-05, forward_time=0.130, loss_ctc=80.506, loss_att=69.250, acc=0.651, loss=72.627, backward_time=0.780, grad_norm=81.945, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.726e-04, train_time=2.563 +[gpua014:0/64] 2023-07-03 03:14:31,249 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua014:0/64] 2023-07-03 03:14:53,126 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 03:14:57,706 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 03:14:57,706 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpua014:0/64] 2023-07-03 03:14:57,713 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 03:20:32,697 (trainer:732) INFO: 6epoch:train:2001-2100batch: iter_time=2.337, forward_time=0.203, loss_ctc=92.289, loss_att=78.161, acc=0.643, loss=82.399, backward_time=0.769, grad_norm=99.875, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.115, optim0_lr0=1.724e-04, train_time=7.620 +[gpua014:0/64] 2023-07-03 03:22:30,232 (trainer:732) INFO: 6epoch:train:2101-2200batch: iter_time=1.127e-04, forward_time=0.105, loss_ctc=74.755, loss_att=62.473, acc=0.636, loss=66.158, backward_time=0.770, grad_norm=77.549, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.722e-04, train_time=2.352 +[gpua014:0/64] 2023-07-03 03:24:27,935 (trainer:732) INFO: 6epoch:train:2201-2300batch: iter_time=1.054e-04, forward_time=0.107, loss_ctc=93.934, loss_att=72.433, acc=0.645, loss=78.883, backward_time=0.767, grad_norm=109.491, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.720e-04, train_time=2.354 +[gpua014:0/64] 2023-07-03 03:26:25,344 (trainer:732) INFO: 6epoch:train:2301-2400batch: iter_time=1.301e-04, forward_time=0.106, loss_ctc=83.956, loss_att=64.235, acc=0.642, loss=70.151, backward_time=0.796, grad_norm=108.855, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.110, optim0_lr0=1.718e-04, train_time=2.348 +[gpua014:0/64] 2023-07-03 03:28:33,609 (trainer:732) INFO: 6epoch:train:2401-2500batch: iter_time=1.435e-04, forward_time=0.106, loss_ctc=88.720, loss_att=69.805, acc=0.662, loss=75.480, backward_time=0.792, grad_norm=103.908, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.716e-04, train_time=2.565 +[gpua014:0/64] 2023-07-03 03:30:27,664 (trainer:732) INFO: 6epoch:train:2501-2600batch: iter_time=1.432e-04, forward_time=0.106, loss_ctc=88.609, loss_att=72.874, acc=0.650, loss=77.594, backward_time=0.772, grad_norm=86.573, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.714e-04, train_time=2.281 +[gpua014:0/64] 2023-07-03 03:32:26,673 (trainer:732) INFO: 6epoch:train:2601-2700batch: iter_time=1.390e-04, forward_time=0.106, loss_ctc=78.574, loss_att=56.358, acc=0.641, loss=63.022, backward_time=0.785, grad_norm=86.307, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.712e-04, train_time=2.380 +[gpua014:0/64] 2023-07-03 03:34:13,713 (trainer:732) INFO: 6epoch:train:2701-2800batch: iter_time=1.206e-04, forward_time=0.106, loss_ctc=86.005, loss_att=67.629, acc=0.635, loss=73.142, backward_time=0.755, grad_norm=92.662, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.710e-04, train_time=2.141 +[gpua014:0/64] 2023-07-03 03:36:12,217 (trainer:732) INFO: 6epoch:train:2801-2900batch: iter_time=1.203e-04, forward_time=0.107, loss_ctc=77.781, loss_att=57.070, acc=0.637, loss=63.284, backward_time=0.784, grad_norm=81.714, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.708e-04, train_time=2.370 +[gpua014:0/64] 2023-07-03 03:38:12,696 (trainer:732) INFO: 6epoch:train:2901-3000batch: iter_time=1.140e-04, forward_time=0.107, loss_ctc=79.278, loss_att=67.765, acc=0.659, loss=71.219, backward_time=0.784, grad_norm=83.937, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.706e-04, train_time=2.409 +[gpua014:0/64] 2023-07-03 03:38:32,724 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua014:0/64] 2023-07-03 03:38:54,889 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 03:38:59,407 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 03:38:59,407 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpua014:0/64] 2023-07-03 03:38:59,414 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 03:47:22,887 (trainer:732) INFO: 6epoch:train:3001-3100batch: iter_time=2.049, forward_time=0.203, loss_ctc=93.250, loss_att=76.037, acc=0.648, loss=81.201, backward_time=0.781, grad_norm=111.928, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.114, optim0_lr0=1.704e-04, train_time=11.003 +[gpua014:0/64] 2023-07-03 03:49:03,103 (trainer:732) INFO: 6epoch:train:3101-3200batch: iter_time=1.112e-04, forward_time=0.108, loss_ctc=74.851, loss_att=59.766, acc=0.647, loss=64.291, backward_time=0.749, grad_norm=100.797, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.702e-04, train_time=2.005 +[gpua014:0/64] 2023-07-03 03:50:42,447 (trainer:732) INFO: 6epoch:train:3201-3300batch: iter_time=1.076e-04, forward_time=0.107, loss_ctc=92.365, loss_att=72.114, acc=0.649, loss=78.189, backward_time=0.746, grad_norm=87.314, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.700e-04, train_time=1.987 +[gpua014:0/64] 2023-07-03 03:52:24,350 (trainer:732) INFO: 6epoch:train:3301-3400batch: iter_time=1.112e-04, forward_time=0.107, loss_ctc=82.555, loss_att=64.473, acc=0.642, loss=69.898, backward_time=0.749, grad_norm=85.977, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.698e-04, train_time=2.038 +[gpua014:0/64] 2023-07-03 03:54:09,308 (trainer:732) INFO: 6epoch:train:3401-3500batch: iter_time=1.150e-04, forward_time=0.108, loss_ctc=87.481, loss_att=69.748, acc=0.663, loss=75.068, backward_time=0.755, grad_norm=83.819, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.696e-04, train_time=2.099 +[gpua014:0/64] 2023-07-03 03:55:58,512 (trainer:732) INFO: 6epoch:train:3501-3600batch: iter_time=1.113e-04, forward_time=0.107, loss_ctc=86.174, loss_att=72.123, acc=0.653, loss=76.338, backward_time=0.758, grad_norm=77.571, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.694e-04, train_time=2.184 +[gpua014:0/64] 2023-07-03 03:57:56,904 (trainer:732) INFO: 6epoch:train:3601-3700batch: iter_time=1.023e-04, forward_time=0.106, loss_ctc=77.503, loss_att=56.289, acc=0.640, loss=62.653, backward_time=0.765, grad_norm=85.979, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.692e-04, train_time=2.368 +[gpua014:0/64] 2023-07-03 03:59:46,927 (trainer:732) INFO: 6epoch:train:3701-3800batch: iter_time=1.034e-04, forward_time=0.107, loss_ctc=85.136, loss_att=69.418, acc=0.636, loss=74.133, backward_time=0.758, grad_norm=91.412, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.690e-04, train_time=2.200 +[gpua014:0/64] 2023-07-03 04:01:52,708 (trainer:732) INFO: 6epoch:train:3801-3900batch: iter_time=1.068e-04, forward_time=0.107, loss_ctc=77.495, loss_att=56.241, acc=0.639, loss=62.617, backward_time=0.794, grad_norm=87.115, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.688e-04, train_time=2.515 +[gpua014:0/64] 2023-07-03 04:03:48,873 (trainer:732) INFO: 6epoch:train:3901-4000batch: iter_time=9.811e-05, forward_time=0.107, loss_ctc=80.520, loss_att=67.275, acc=0.662, loss=71.248, backward_time=0.771, grad_norm=76.522, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.686e-04, train_time=2.323 +[gpua014:0/64] 2023-07-03 04:03:51,017 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua014:0/64] 2023-07-03 04:04:13,127 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 04:04:17,367 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 04:04:17,367 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpua014:0/64] 2023-07-03 04:04:17,471 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 04:09:31,917 (trainer:732) INFO: 6epoch:train:4001-4100batch: iter_time=2.215, forward_time=0.186, loss_ctc=88.842, loss_att=76.014, acc=0.651, loss=79.862, backward_time=0.767, grad_norm=105.736, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.116, optim0_lr0=1.684e-04, train_time=6.860 +[gpua014:0/64] 2023-07-03 04:11:11,674 (trainer:732) INFO: 6epoch:train:4101-4200batch: iter_time=9.981e-05, forward_time=0.105, loss_ctc=74.425, loss_att=59.999, acc=0.649, loss=64.327, backward_time=0.746, grad_norm=81.310, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.683e-04, train_time=1.995 +[gpua014:0/64] 2023-07-03 04:12:51,181 (trainer:732) INFO: 6epoch:train:4201-4300batch: iter_time=9.832e-05, forward_time=0.105, loss_ctc=91.340, loss_att=70.328, acc=0.655, loss=76.631, backward_time=0.747, grad_norm=83.428, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.681e-04, train_time=1.990 +[gpua014:0/64] 2023-07-03 04:14:30,517 (trainer:732) INFO: 6epoch:train:4301-4400batch: iter_time=9.483e-05, forward_time=0.105, loss_ctc=83.233, loss_att=64.260, acc=0.643, loss=69.951, backward_time=0.746, grad_norm=100.262, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.679e-04, train_time=1.986 +[gpua014:0/64] 2023-07-03 04:16:09,792 (trainer:732) INFO: 6epoch:train:4401-4500batch: iter_time=9.761e-05, forward_time=0.105, loss_ctc=90.368, loss_att=70.577, acc=0.662, loss=76.514, backward_time=0.746, grad_norm=81.411, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.677e-04, train_time=1.985 +[gpua014:0/64] 2023-07-03 04:18:01,751 (trainer:732) INFO: 6epoch:train:4501-4600batch: iter_time=1.016e-04, forward_time=0.106, loss_ctc=85.446, loss_att=71.129, acc=0.659, loss=75.424, backward_time=0.770, grad_norm=86.667, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.675e-04, train_time=2.239 +[gpua014:0/64] 2023-07-03 04:19:41,277 (trainer:732) INFO: 6epoch:train:4601-4700batch: iter_time=9.728e-05, forward_time=0.106, loss_ctc=76.773, loss_att=54.965, acc=0.648, loss=61.508, backward_time=0.746, grad_norm=87.491, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.673e-04, train_time=1.990 +[gpua014:0/64] 2023-07-03 04:21:39,719 (trainer:732) INFO: 6epoch:train:4701-4800batch: iter_time=9.446e-05, forward_time=0.106, loss_ctc=82.772, loss_att=66.380, acc=0.644, loss=71.297, backward_time=0.777, grad_norm=83.858, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.671e-04, train_time=2.369 +[gpua014:0/64] 2023-07-03 04:23:44,102 (trainer:732) INFO: 6epoch:train:4801-4900batch: iter_time=9.355e-05, forward_time=0.106, loss_ctc=77.056, loss_att=56.067, acc=0.641, loss=62.364, backward_time=0.814, grad_norm=84.528, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.669e-04, train_time=2.487 +[gpua014:0/64] 2023-07-03 04:25:47,722 (trainer:732) INFO: 6epoch:train:4901-5000batch: iter_time=0.007, forward_time=0.203, loss_ctc=80.900, loss_att=69.487, acc=0.659, loss=72.911, backward_time=0.814, grad_norm=91.357, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.116, optim0_lr0=1.668e-04, train_time=2.472 +[gpua014:0/64] 2023-07-03 04:26:07,789 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua014:0/64] 2023-07-03 04:26:30,505 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 04:26:34,798 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 04:26:34,799 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpua014:0/64] 2023-07-03 04:26:34,806 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 04:31:31,000 (trainer:732) INFO: 6epoch:train:5001-5100batch: iter_time=2.233, forward_time=0.159, loss_ctc=89.132, loss_att=77.996, acc=0.636, loss=81.337, backward_time=0.767, grad_norm=118.324, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.116, optim0_lr0=1.666e-04, train_time=6.865 +[gpua014:0/64] 2023-07-03 04:33:10,944 (trainer:732) INFO: 6epoch:train:5101-5200batch: iter_time=1.113e-04, forward_time=0.107, loss_ctc=74.757, loss_att=62.723, acc=0.634, loss=66.333, backward_time=0.747, grad_norm=76.771, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.112, optim0_lr0=1.664e-04, train_time=1.999 +[gpua014:0/64] 2023-07-03 04:34:50,322 (trainer:732) INFO: 6epoch:train:5201-5300batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=94.212, loss_att=73.668, acc=0.649, loss=79.831, backward_time=0.746, grad_norm=95.822, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.112, optim0_lr0=1.662e-04, train_time=1.987 +[gpua014:0/64] 2023-07-03 04:36:29,532 (trainer:732) INFO: 6epoch:train:5301-5400batch: iter_time=1.232e-04, forward_time=0.106, loss_ctc=81.876, loss_att=63.503, acc=0.639, loss=69.015, backward_time=0.745, grad_norm=77.236, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.660e-04, train_time=1.984 +[gpua014:0/64] 2023-07-03 04:38:18,974 (trainer:732) INFO: 6epoch:train:5401-5500batch: iter_time=1.173e-04, forward_time=0.106, loss_ctc=87.849, loss_att=70.680, acc=0.660, loss=75.831, backward_time=0.767, grad_norm=75.704, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.658e-04, train_time=2.189 +[gpua014:0/64] 2023-07-03 04:40:16,088 (trainer:732) INFO: 6epoch:train:5501-5600batch: iter_time=1.242e-04, forward_time=0.106, loss_ctc=85.866, loss_att=70.686, acc=0.652, loss=75.240, backward_time=0.780, grad_norm=90.447, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.657e-04, train_time=2.342 +[gpua014:0/64] 2023-07-03 04:41:55,475 (trainer:732) INFO: 6epoch:train:5601-5700batch: iter_time=1.210e-04, forward_time=0.105, loss_ctc=75.238, loss_att=56.204, acc=0.638, loss=61.914, backward_time=0.745, grad_norm=84.200, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.655e-04, train_time=1.988 +[gpua014:0/64] 2023-07-03 04:43:38,616 (trainer:732) INFO: 6epoch:train:5701-5800batch: iter_time=1.118e-04, forward_time=0.105, loss_ctc=85.090, loss_att=66.747, acc=0.631, loss=72.250, backward_time=0.751, grad_norm=86.670, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.653e-04, train_time=2.063 +[gpua014:0/64] 2023-07-03 04:45:39,837 (trainer:732) INFO: 6epoch:train:5801-5900batch: iter_time=1.146e-04, forward_time=0.106, loss_ctc=75.722, loss_att=55.597, acc=0.634, loss=61.634, backward_time=0.780, grad_norm=82.304, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.651e-04, train_time=2.424 +[gpua014:0/64] 2023-07-03 04:47:39,154 (trainer:732) INFO: 6epoch:train:5901-6000batch: iter_time=1.108e-04, forward_time=0.107, loss_ctc=77.339, loss_att=66.161, acc=0.663, loss=69.514, backward_time=0.769, grad_norm=77.476, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.649e-04, train_time=2.386 +[gpua014:0/64] 2023-07-03 04:47:59,182 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua014:0/64] 2023-07-03 04:48:21,691 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 04:48:25,979 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 04:48:25,979 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpua014:0/64] 2023-07-03 04:48:25,986 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 04:54:06,958 (trainer:732) INFO: 6epoch:train:6001-6100batch: iter_time=1.850, forward_time=0.163, loss_ctc=87.571, loss_att=75.585, acc=0.643, loss=79.181, backward_time=0.768, grad_norm=97.971, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.113, optim0_lr0=1.648e-04, train_time=7.755 +[gpua014:0/64] 2023-07-03 04:55:46,985 (trainer:732) INFO: 6epoch:train:6101-6200batch: iter_time=9.642e-05, forward_time=0.105, loss_ctc=73.381, loss_att=60.787, acc=0.640, loss=64.565, backward_time=0.747, grad_norm=71.943, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.646e-04, train_time=2.001 +[gpua014:0/64] 2023-07-03 04:57:27,443 (trainer:732) INFO: 6epoch:train:6201-6300batch: iter_time=1.004e-04, forward_time=0.105, loss_ctc=90.091, loss_att=70.809, acc=0.655, loss=76.594, backward_time=0.748, grad_norm=127.042, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.644e-04, train_time=2.009 +[gpua014:0/64] 2023-07-03 04:59:11,615 (trainer:732) INFO: 6epoch:train:6301-6400batch: iter_time=9.925e-05, forward_time=0.105, loss_ctc=82.087, loss_att=62.509, acc=0.641, loss=68.382, backward_time=0.769, grad_norm=88.434, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.642e-04, train_time=2.083 +[gpua014:0/64] 2023-07-03 05:00:54,719 (trainer:732) INFO: 6epoch:train:6401-6500batch: iter_time=1.032e-04, forward_time=0.122, loss_ctc=84.899, loss_att=68.086, acc=0.664, loss=73.129, backward_time=0.755, grad_norm=74.987, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.640e-04, train_time=2.059 +[gpua014:0/64] 2023-07-03 05:02:45,070 (trainer:732) INFO: 6epoch:train:6501-6600batch: iter_time=1.054e-04, forward_time=0.105, loss_ctc=83.659, loss_att=69.016, acc=0.660, loss=73.409, backward_time=0.783, grad_norm=84.921, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.639e-04, train_time=2.210 +[gpua014:0/64] 2023-07-03 05:04:29,809 (trainer:732) INFO: 6epoch:train:6601-6700batch: iter_time=1.032e-04, forward_time=0.105, loss_ctc=74.958, loss_att=54.557, acc=0.644, loss=60.678, backward_time=0.756, grad_norm=102.845, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.637e-04, train_time=2.095 +[gpua014:0/64] 2023-07-03 05:06:13,778 (trainer:732) INFO: 6epoch:train:6701-6800batch: iter_time=1.038e-04, forward_time=0.106, loss_ctc=83.016, loss_att=65.807, acc=0.635, loss=70.970, backward_time=0.759, grad_norm=97.302, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.635e-04, train_time=2.079 +[gpua014:0/64] 2023-07-03 05:08:19,430 (trainer:732) INFO: 6epoch:train:6801-6900batch: iter_time=1.233e-04, forward_time=0.122, loss_ctc=77.686, loss_att=56.492, acc=0.635, loss=62.850, backward_time=0.800, grad_norm=93.416, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.114, optim0_lr0=1.633e-04, train_time=2.513 +[gpua014:0/64] 2023-07-03 05:10:18,611 (trainer:732) INFO: 6epoch:train:6901-7000batch: iter_time=1.031e-04, forward_time=0.107, loss_ctc=77.728, loss_att=65.800, acc=0.665, loss=69.378, backward_time=0.783, grad_norm=82.751, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.632e-04, train_time=2.383 +[gpua014:0/64] 2023-07-03 05:10:32,073 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua014:0/64] 2023-07-03 05:10:54,606 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 05:10:58,924 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 05:10:58,925 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpua014:0/64] 2023-07-03 05:10:58,932 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 05:15:48,763 (trainer:732) INFO: 6epoch:train:7001-7100batch: iter_time=2.120, forward_time=0.144, loss_ctc=88.048, loss_att=75.035, acc=0.650, loss=78.939, backward_time=0.768, grad_norm=95.486, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.112, optim0_lr0=1.630e-04, train_time=6.603 +[gpua014:0/64] 2023-07-03 05:17:28,660 (trainer:732) INFO: 6epoch:train:7101-7200batch: iter_time=9.898e-05, forward_time=0.107, loss_ctc=72.504, loss_att=60.062, acc=0.653, loss=63.794, backward_time=0.747, grad_norm=105.157, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.628e-04, train_time=1.998 +[gpua014:0/64] 2023-07-03 05:19:07,845 (trainer:732) INFO: 6epoch:train:7201-7300batch: iter_time=9.656e-05, forward_time=0.106, loss_ctc=91.719, loss_att=69.674, acc=0.659, loss=76.288, backward_time=0.745, grad_norm=85.801, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.626e-04, train_time=1.983 +[gpua014:0/64] 2023-07-03 05:20:47,324 (trainer:732) INFO: 6epoch:train:7301-7400batch: iter_time=9.789e-05, forward_time=0.107, loss_ctc=80.404, loss_att=61.463, acc=0.651, loss=67.146, backward_time=0.747, grad_norm=78.615, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.625e-04, train_time=1.989 +[gpua014:0/64] 2023-07-03 05:22:43,044 (trainer:732) INFO: 6epoch:train:7401-7500batch: iter_time=1.046e-04, forward_time=0.105, loss_ctc=86.992, loss_att=69.038, acc=0.670, loss=74.424, backward_time=0.768, grad_norm=79.027, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.623e-04, train_time=2.314 +[gpua014:0/64] 2023-07-03 05:24:37,024 (trainer:732) INFO: 6epoch:train:7501-7600batch: iter_time=9.948e-05, forward_time=0.106, loss_ctc=83.558, loss_att=70.280, acc=0.662, loss=74.264, backward_time=0.780, grad_norm=76.423, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.621e-04, train_time=2.279 +[gpua014:0/64] 2023-07-03 05:26:23,669 (trainer:732) INFO: 6epoch:train:7601-7700batch: iter_time=9.911e-05, forward_time=0.105, loss_ctc=74.621, loss_att=54.975, acc=0.655, loss=60.869, backward_time=0.757, grad_norm=80.254, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.620e-04, train_time=2.133 +[gpua014:0/64] 2023-07-03 05:28:16,661 (trainer:732) INFO: 6epoch:train:7701-7800batch: iter_time=9.433e-05, forward_time=0.106, loss_ctc=85.326, loss_att=66.803, acc=0.645, loss=72.360, backward_time=0.766, grad_norm=105.083, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.618e-04, train_time=2.260 +[gpua014:0/64] 2023-07-03 05:29:55,886 (trainer:732) INFO: 6epoch:train:7801-7900batch: iter_time=9.898e-05, forward_time=0.105, loss_ctc=75.877, loss_att=55.711, acc=0.645, loss=61.761, backward_time=0.745, grad_norm=86.470, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.616e-04, train_time=1.984 +[gpua014:0/64] 2023-07-03 05:31:49,297 (trainer:732) INFO: 6epoch:train:7901-8000batch: iter_time=9.372e-05, forward_time=0.106, loss_ctc=76.947, loss_att=65.617, acc=0.671, loss=69.016, backward_time=0.768, grad_norm=79.259, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.615e-04, train_time=2.268 +[gpua014:0/64] 2023-07-03 05:31:51,126 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua014:0/64] 2023-07-03 05:32:13,485 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 05:32:17,730 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 05:32:17,730 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpua014:0/64] 2023-07-03 05:32:17,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 05:37:41,373 (trainer:732) INFO: 6epoch:train:8001-8100batch: iter_time=1.653, forward_time=0.142, loss_ctc=89.665, loss_att=75.977, acc=0.646, loss=80.083, backward_time=0.771, grad_norm=88.148, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.613e-04, train_time=7.041 +[gpua014:0/64] 2023-07-03 05:39:38,281 (trainer:732) INFO: 6epoch:train:8101-8200batch: iter_time=9.604e-05, forward_time=0.105, loss_ctc=73.797, loss_att=60.863, acc=0.641, loss=64.743, backward_time=0.781, grad_norm=77.873, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.611e-04, train_time=2.338 +[gpua014:0/64] 2023-07-03 05:41:36,301 (trainer:732) INFO: 6epoch:train:8201-8300batch: iter_time=1.047e-04, forward_time=0.106, loss_ctc=89.977, loss_att=71.057, acc=0.654, loss=76.733, backward_time=0.780, grad_norm=87.163, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.610e-04, train_time=2.360 +[gpua014:0/64] 2023-07-03 05:43:33,779 (trainer:732) INFO: 6epoch:train:8301-8400batch: iter_time=9.713e-05, forward_time=0.107, loss_ctc=81.141, loss_att=62.409, acc=0.643, loss=68.028, backward_time=0.780, grad_norm=79.842, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.608e-04, train_time=2.349 +[gpua014:0/64] 2023-07-03 05:45:32,539 (trainer:732) INFO: 6epoch:train:8401-8500batch: iter_time=9.466e-05, forward_time=0.106, loss_ctc=86.755, loss_att=69.796, acc=0.661, loss=74.884, backward_time=0.801, grad_norm=82.255, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.606e-04, train_time=2.375 +[gpua014:0/64] 2023-07-03 05:47:25,967 (trainer:732) INFO: 6epoch:train:8501-8600batch: iter_time=9.814e-05, forward_time=0.106, loss_ctc=84.611, loss_att=69.021, acc=0.659, loss=73.698, backward_time=0.777, grad_norm=88.656, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.605e-04, train_time=2.268 +[gpua014:0/64] 2023-07-03 05:49:19,536 (trainer:732) INFO: 6epoch:train:8601-8700batch: iter_time=9.950e-05, forward_time=0.106, loss_ctc=72.996, loss_att=53.800, acc=0.651, loss=59.559, backward_time=0.784, grad_norm=82.001, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.603e-04, train_time=2.271 +[gpua014:0/64] 2023-07-03 05:51:23,335 (trainer:732) INFO: 6epoch:train:8701-8800batch: iter_time=9.708e-05, forward_time=0.106, loss_ctc=83.993, loss_att=66.774, acc=0.636, loss=71.940, backward_time=0.827, grad_norm=106.452, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.601e-04, train_time=2.476 +[gpua014:0/64] 2023-07-03 05:53:17,393 (trainer:732) INFO: 6epoch:train:8801-8900batch: iter_time=9.769e-05, forward_time=0.106, loss_ctc=77.297, loss_att=55.638, acc=0.636, loss=62.136, backward_time=0.764, grad_norm=84.563, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.600e-04, train_time=2.281 +[gpua014:0/64] 2023-07-03 05:55:39,873 (trainer:732) INFO: 6epoch:train:8901-9000batch: iter_time=5.865e-04, forward_time=0.129, loss_ctc=78.944, loss_att=65.397, acc=0.665, loss=69.461, backward_time=0.808, grad_norm=87.114, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.116, optim0_lr0=1.598e-04, train_time=2.849 +[gpua014:0/64] 2023-07-03 05:55:59,938 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua014:0/64] 2023-07-03 05:56:22,526 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 05:56:26,824 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 05:56:26,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpua014:0/64] 2023-07-03 05:56:26,855 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 06:03:52,584 (trainer:732) INFO: 6epoch:train:9001-9100batch: iter_time=2.073, forward_time=0.141, loss_ctc=88.284, loss_att=74.413, acc=0.652, loss=78.574, backward_time=0.768, grad_norm=114.120, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.596e-04, train_time=9.853 +[gpua014:0/64] 2023-07-03 06:05:32,019 (trainer:732) INFO: 6epoch:train:9101-9200batch: iter_time=1.009e-04, forward_time=0.105, loss_ctc=72.526, loss_att=60.398, acc=0.642, loss=64.036, backward_time=0.747, grad_norm=88.042, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.595e-04, train_time=1.989 +[gpua014:0/64] 2023-07-03 06:07:17,745 (trainer:732) INFO: 6epoch:train:9201-9300batch: iter_time=9.225e-05, forward_time=0.106, loss_ctc=88.224, loss_att=68.416, acc=0.662, loss=74.359, backward_time=0.758, grad_norm=83.370, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.593e-04, train_time=2.114 +[gpua014:0/64] 2023-07-03 06:09:06,317 (trainer:732) INFO: 6epoch:train:9301-9400batch: iter_time=1.235e-04, forward_time=0.113, loss_ctc=79.435, loss_att=61.439, acc=0.647, loss=66.838, backward_time=0.769, grad_norm=85.096, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.591e-04, train_time=2.171 +[gpua014:0/64] 2023-07-03 06:10:51,994 (trainer:732) INFO: 6epoch:train:9401-9500batch: iter_time=1.235e-04, forward_time=0.134, loss_ctc=85.244, loss_att=67.940, acc=0.667, loss=73.131, backward_time=0.759, grad_norm=77.725, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.118, optim0_lr0=1.590e-04, train_time=2.113 +[gpua014:0/64] 2023-07-03 06:12:33,563 (trainer:732) INFO: 6epoch:train:9501-9600batch: iter_time=1.212e-04, forward_time=0.116, loss_ctc=83.273, loss_att=68.461, acc=0.664, loss=72.904, backward_time=0.747, grad_norm=79.840, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.110, optim0_lr0=1.588e-04, train_time=2.031 +[gpua014:0/64] 2023-07-03 06:14:26,421 (trainer:732) INFO: 6epoch:train:9601-9700batch: iter_time=1.196e-04, forward_time=0.107, loss_ctc=73.391, loss_att=53.716, acc=0.650, loss=59.619, backward_time=0.761, grad_norm=81.447, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.110, optim0_lr0=1.587e-04, train_time=2.257 +[gpua014:0/64] 2023-07-03 06:16:27,245 (trainer:732) INFO: 6epoch:train:9701-9800batch: iter_time=2.269e-04, forward_time=0.135, loss_ctc=83.341, loss_att=65.480, acc=0.639, loss=70.839, backward_time=0.769, grad_norm=86.702, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.585e-04, train_time=2.416 +[gpua014:0/64] 2023-07-03 06:18:38,458 (trainer:732) INFO: 6epoch:train:9801-9900batch: iter_time=1.187e-04, forward_time=0.105, loss_ctc=75.430, loss_att=54.355, acc=0.644, loss=60.678, backward_time=0.817, grad_norm=86.330, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.110, optim0_lr0=1.583e-04, train_time=2.624 +[gpua014:0/64] 2023-07-03 06:20:41,062 (trainer:732) INFO: 6epoch:train:9901-10000batch: iter_time=1.247e-04, forward_time=0.105, loss_ctc=78.271, loss_att=65.479, acc=0.666, loss=69.316, backward_time=0.787, grad_norm=81.913, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.110, optim0_lr0=1.582e-04, train_time=2.451 +[gpua014:0/64] 2023-07-03 06:32:34,851 (trainer:338) INFO: 6epoch results: [train] iter_time=0.207, forward_time=0.117, loss_ctc=83.321, loss_att=66.284, acc=0.646, loss=71.395, backward_time=0.769, grad_norm=89.618, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.112, optim0_lr0=1.669e-04, train_time=2.823, time=3 hours, 55 minutes and 37.95 seconds, total_count=30000, gpu_max_cached_mem_GB=34.184, [valid] loss_ctc=65.053, cer_ctc=0.342, loss_att=52.110, acc=0.582, cer=0.475, wer=1.000, loss=55.993, time=5 minutes and 34.4 seconds, total_count=3542, gpu_max_cached_mem_GB=37.479, [att_plot] time=5 minutes and 55.66 seconds, total_count=0, gpu_max_cached_mem_GB=37.479 +[gpua014:0/64] 2023-07-03 06:32:53,824 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua014:0/64] 2023-07-03 06:32:53,855 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/1epoch.pth +[gpua014:0/64] 2023-07-03 06:32:53,943 (trainer:272) INFO: 7/100epoch started. Estimated time to finish: 2 weeks, 2 days and 3 hours +[gpua014:0/64] 2023-07-03 06:32:55,418 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua014:0/64] 2023-07-03 06:33:18,973 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 06:33:23,949 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 06:33:23,949 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpua014:0/64] 2023-07-03 06:33:24,009 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 06:46:45,718 (trainer:732) INFO: 7epoch:train:1-100batch: iter_time=7.202, forward_time=0.177, loss_ctc=95.276, loss_att=81.548, acc=0.618, loss=85.666, backward_time=0.772, grad_norm=101.106, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.115, optim0_lr0=1.580e-04, train_time=16.619 +[gpua014:0/64] 2023-07-03 06:48:26,044 (trainer:732) INFO: 7epoch:train:101-200batch: iter_time=1.246e-04, forward_time=0.108, loss_ctc=78.282, loss_att=64.557, acc=0.642, loss=68.675, backward_time=0.755, grad_norm=81.816, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.579e-04, train_time=2.006 +[gpua014:0/64] 2023-07-03 06:50:06,508 (trainer:732) INFO: 7epoch:train:201-300batch: iter_time=1.371e-04, forward_time=0.109, loss_ctc=98.448, loss_att=70.985, acc=0.654, loss=79.224, backward_time=0.755, grad_norm=111.819, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.577e-04, train_time=2.009 +[gpua014:0/64] 2023-07-03 06:51:50,760 (trainer:732) INFO: 7epoch:train:301-400batch: iter_time=3.589e-04, forward_time=0.117, loss_ctc=68.770, loss_att=57.090, acc=0.637, loss=60.594, backward_time=0.758, grad_norm=72.128, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.114, optim0_lr0=1.576e-04, train_time=2.083 +[gpua014:0/64] 2023-07-03 06:53:47,407 (trainer:732) INFO: 7epoch:train:401-500batch: iter_time=1.296e-04, forward_time=0.219, loss_ctc=73.520, loss_att=63.518, acc=0.630, loss=66.518, backward_time=0.767, grad_norm=81.990, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.152, optim0_lr0=1.574e-04, train_time=2.334 +[gpua014:0/64] 2023-07-03 06:55:27,528 (trainer:732) INFO: 7epoch:train:501-600batch: iter_time=1.308e-04, forward_time=0.111, loss_ctc=93.523, loss_att=72.063, acc=0.620, loss=78.501, backward_time=0.751, grad_norm=104.469, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.572e-04, train_time=2.002 +[gpua014:0/64] 2023-07-03 06:57:26,998 (trainer:732) INFO: 7epoch:train:601-700batch: iter_time=9.820e-05, forward_time=0.108, loss_ctc=85.199, loss_att=67.503, acc=0.642, loss=72.812, backward_time=0.817, grad_norm=89.793, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.571e-04, train_time=2.389 +[gpua014:0/64] 2023-07-03 06:59:11,942 (trainer:732) INFO: 7epoch:train:701-800batch: iter_time=9.532e-05, forward_time=0.106, loss_ctc=75.795, loss_att=59.124, acc=0.652, loss=64.125, backward_time=0.761, grad_norm=72.838, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.569e-04, train_time=2.099 +[gpua014:0/64] 2023-07-03 07:01:05,504 (trainer:732) INFO: 7epoch:train:801-900batch: iter_time=9.391e-05, forward_time=0.106, loss_ctc=80.987, loss_att=61.948, acc=0.629, loss=67.660, backward_time=0.771, grad_norm=169.450, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.568e-04, train_time=2.271 +[gpua014:0/64] 2023-07-03 07:02:59,172 (trainer:732) INFO: 7epoch:train:901-1000batch: iter_time=9.789e-05, forward_time=0.107, loss_ctc=94.412, loss_att=71.680, acc=0.634, loss=78.499, backward_time=0.781, grad_norm=87.757, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.566e-04, train_time=2.273 +[gpua014:0/64] 2023-07-03 07:03:13,496 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua014:0/64] 2023-07-03 07:03:35,626 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 07:03:40,087 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 07:03:40,087 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpua014:0/64] 2023-07-03 07:03:40,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 07:11:32,069 (trainer:732) INFO: 7epoch:train:1001-1100batch: iter_time=2.149, forward_time=0.164, loss_ctc=91.941, loss_att=80.290, acc=0.624, loss=83.785, backward_time=0.772, grad_norm=101.163, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.114, optim0_lr0=1.565e-04, train_time=10.258 +[gpua014:0/64] 2023-07-03 07:13:18,931 (trainer:732) INFO: 7epoch:train:1101-1200batch: iter_time=1.172e-04, forward_time=0.107, loss_ctc=79.552, loss_att=63.497, acc=0.642, loss=68.313, backward_time=0.757, grad_norm=87.614, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.563e-04, train_time=2.137 +[gpua014:0/64] 2023-07-03 07:14:58,837 (trainer:732) INFO: 7epoch:train:1201-1300batch: iter_time=1.165e-04, forward_time=0.107, loss_ctc=95.280, loss_att=67.963, acc=0.658, loss=76.158, backward_time=0.752, grad_norm=166.924, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.562e-04, train_time=1.998 +[gpua014:0/64] 2023-07-03 07:16:47,297 (trainer:732) INFO: 7epoch:train:1301-1400batch: iter_time=1.161e-04, forward_time=0.106, loss_ctc=68.366, loss_att=56.026, acc=0.640, loss=59.728, backward_time=0.771, grad_norm=76.276, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.560e-04, train_time=2.169 +[gpua014:0/64] 2023-07-03 07:18:37,930 (trainer:732) INFO: 7epoch:train:1401-1500batch: iter_time=1.157e-04, forward_time=0.106, loss_ctc=72.529, loss_att=61.471, acc=0.638, loss=64.789, backward_time=0.784, grad_norm=79.624, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.559e-04, train_time=2.212 +[gpua014:0/64] 2023-07-03 07:20:20,428 (trainer:732) INFO: 7epoch:train:1501-1600batch: iter_time=1.275e-04, forward_time=0.107, loss_ctc=92.853, loss_att=71.159, acc=0.623, loss=77.667, backward_time=0.755, grad_norm=97.225, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.557e-04, train_time=2.050 +[gpua014:0/64] 2023-07-03 07:22:06,737 (trainer:732) INFO: 7epoch:train:1601-1700batch: iter_time=1.174e-04, forward_time=0.107, loss_ctc=82.892, loss_att=64.823, acc=0.651, loss=70.244, backward_time=0.761, grad_norm=102.633, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.556e-04, train_time=2.126 +[gpua014:0/64] 2023-07-03 07:24:10,142 (trainer:732) INFO: 7epoch:train:1701-1800batch: iter_time=1.456e-04, forward_time=0.106, loss_ctc=74.365, loss_att=57.403, acc=0.655, loss=62.492, backward_time=0.797, grad_norm=72.839, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.554e-04, train_time=2.468 +[gpua014:0/64] 2023-07-03 07:26:14,745 (trainer:732) INFO: 7epoch:train:1801-1900batch: iter_time=1.130e-04, forward_time=0.107, loss_ctc=76.773, loss_att=58.806, acc=0.640, loss=64.196, backward_time=0.803, grad_norm=78.057, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.553e-04, train_time=2.492 +[gpua014:0/64] 2023-07-03 07:28:26,877 (trainer:732) INFO: 7epoch:train:1901-2000batch: iter_time=1.142e-04, forward_time=0.107, loss_ctc=93.969, loss_att=70.125, acc=0.636, loss=77.279, backward_time=0.796, grad_norm=91.970, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.551e-04, train_time=2.642 +[gpua014:0/64] 2023-07-03 07:28:46,906 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua014:0/64] 2023-07-03 07:29:09,281 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 07:29:13,884 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 07:29:13,884 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpua014:0/64] 2023-07-03 07:29:13,892 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 07:34:52,459 (trainer:732) INFO: 7epoch:train:2001-2100batch: iter_time=2.218, forward_time=0.170, loss_ctc=90.821, loss_att=77.154, acc=0.640, loss=81.254, backward_time=0.770, grad_norm=94.828, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.114, optim0_lr0=1.550e-04, train_time=7.711 +[gpua014:0/64] 2023-07-03 07:36:51,476 (trainer:732) INFO: 7epoch:train:2101-2200batch: iter_time=1.022e-04, forward_time=0.107, loss_ctc=77.592, loss_att=64.767, acc=0.657, loss=68.614, backward_time=0.798, grad_norm=91.890, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.548e-04, train_time=2.380 +[gpua014:0/64] 2023-07-03 07:38:47,649 (trainer:732) INFO: 7epoch:train:2201-2300batch: iter_time=1.175e-04, forward_time=0.108, loss_ctc=93.080, loss_att=64.886, acc=0.669, loss=73.345, backward_time=0.796, grad_norm=102.296, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.547e-04, train_time=2.323 +[gpua014:0/64] 2023-07-03 07:40:46,777 (trainer:732) INFO: 7epoch:train:2301-2400batch: iter_time=1.174e-04, forward_time=0.107, loss_ctc=69.893, loss_att=57.571, acc=0.647, loss=61.267, backward_time=0.774, grad_norm=72.489, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.545e-04, train_time=2.382 +[gpua014:0/64] 2023-07-03 07:42:36,668 (trainer:732) INFO: 7epoch:train:2401-2500batch: iter_time=1.217e-04, forward_time=0.108, loss_ctc=71.496, loss_att=60.242, acc=0.652, loss=63.618, backward_time=0.765, grad_norm=87.366, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.544e-04, train_time=2.198 +[gpua014:0/64] 2023-07-03 07:44:33,450 (trainer:732) INFO: 7epoch:train:2501-2600batch: iter_time=1.202e-04, forward_time=0.107, loss_ctc=93.104, loss_att=71.943, acc=0.630, loss=78.291, backward_time=0.779, grad_norm=117.869, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.542e-04, train_time=2.335 +[gpua014:0/64] 2023-07-03 07:46:44,195 (trainer:732) INFO: 7epoch:train:2601-2700batch: iter_time=1.161e-04, forward_time=0.108, loss_ctc=82.752, loss_att=66.339, acc=0.656, loss=71.263, backward_time=0.798, grad_norm=94.956, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.541e-04, train_time=2.615 +[gpua014:0/64] 2023-07-03 07:49:06,512 (trainer:732) INFO: 7epoch:train:2701-2800batch: iter_time=1.162e-04, forward_time=0.107, loss_ctc=76.092, loss_att=57.947, acc=0.662, loss=63.391, backward_time=0.854, grad_norm=96.852, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.539e-04, train_time=2.846 +[gpua014:0/64] 2023-07-03 07:51:15,092 (trainer:732) INFO: 7epoch:train:2801-2900batch: iter_time=1.231e-04, forward_time=0.107, loss_ctc=77.220, loss_att=60.708, acc=0.646, loss=65.661, backward_time=0.780, grad_norm=79.761, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.538e-04, train_time=2.571 +[gpua014:0/64] 2023-07-03 07:53:30,532 (trainer:732) INFO: 7epoch:train:2901-3000batch: iter_time=1.155e-04, forward_time=0.107, loss_ctc=92.640, loss_att=69.053, acc=0.651, loss=76.129, backward_time=0.817, grad_norm=105.478, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.536e-04, train_time=2.709 +[gpua014:0/64] 2023-07-03 07:53:32,581 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua014:0/64] 2023-07-03 07:53:54,651 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 07:53:59,259 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 07:53:59,259 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpua014:0/64] 2023-07-03 07:53:59,267 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 08:01:50,181 (trainer:732) INFO: 7epoch:train:3001-3100batch: iter_time=1.542, forward_time=0.157, loss_ctc=89.125, loss_att=75.893, acc=0.641, loss=79.863, backward_time=0.770, grad_norm=101.349, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.115, optim0_lr0=1.535e-04, train_time=9.993 +[gpua014:0/64] 2023-07-03 08:03:31,460 (trainer:732) INFO: 7epoch:train:3101-3200batch: iter_time=1.089e-04, forward_time=0.109, loss_ctc=75.488, loss_att=61.887, acc=0.663, loss=65.968, backward_time=0.756, grad_norm=75.186, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.534e-04, train_time=2.025 +[gpua014:0/64] 2023-07-03 08:05:29,565 (trainer:732) INFO: 7epoch:train:3201-3300batch: iter_time=1.154e-04, forward_time=0.108, loss_ctc=93.877, loss_att=64.871, acc=0.673, loss=73.573, backward_time=0.778, grad_norm=102.635, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.532e-04, train_time=2.362 +[gpua014:0/64] 2023-07-03 08:07:28,930 (trainer:732) INFO: 7epoch:train:3301-3400batch: iter_time=1.155e-04, forward_time=0.107, loss_ctc=67.271, loss_att=55.343, acc=0.658, loss=58.921, backward_time=0.803, grad_norm=76.950, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.531e-04, train_time=2.387 +[gpua014:0/64] 2023-07-03 08:09:13,964 (trainer:732) INFO: 7epoch:train:3401-3500batch: iter_time=1.163e-04, forward_time=0.107, loss_ctc=70.710, loss_att=59.768, acc=0.654, loss=63.051, backward_time=0.755, grad_norm=75.149, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.529e-04, train_time=2.100 +[gpua014:0/64] 2023-07-03 08:11:10,680 (trainer:732) INFO: 7epoch:train:3501-3600batch: iter_time=1.144e-04, forward_time=0.108, loss_ctc=89.658, loss_att=70.387, acc=0.636, loss=76.168, backward_time=0.767, grad_norm=89.805, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.528e-04, train_time=2.334 +[gpua014:0/64] 2023-07-03 08:13:17,684 (trainer:732) INFO: 7epoch:train:3601-3700batch: iter_time=6.304e-04, forward_time=0.165, loss_ctc=82.508, loss_att=65.475, acc=0.661, loss=70.585, backward_time=0.819, grad_norm=78.875, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.118, optim0_lr0=1.526e-04, train_time=2.539 +[gpua014:0/64] 2023-07-03 08:15:30,212 (trainer:732) INFO: 7epoch:train:3701-3800batch: iter_time=1.124e-04, forward_time=0.109, loss_ctc=72.864, loss_att=56.506, acc=0.669, loss=61.413, backward_time=0.808, grad_norm=70.706, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.525e-04, train_time=2.651 +[gpua014:0/64] 2023-07-03 08:17:47,257 (trainer:732) INFO: 7epoch:train:3801-3900batch: iter_time=1.222e-04, forward_time=0.107, loss_ctc=77.452, loss_att=59.300, acc=0.648, loss=64.746, backward_time=0.810, grad_norm=91.894, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.524e-04, train_time=2.741 +[gpua014:0/64] 2023-07-03 08:19:44,368 (trainer:732) INFO: 7epoch:train:3901-4000batch: iter_time=1.179e-04, forward_time=0.108, loss_ctc=91.937, loss_att=68.717, acc=0.652, loss=75.683, backward_time=0.778, grad_norm=90.949, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.522e-04, train_time=2.342 +[gpua014:0/64] 2023-07-03 08:19:59,462 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua014:0/64] 2023-07-03 08:20:21,953 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 08:20:26,425 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 08:20:26,425 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpua014:0/64] 2023-07-03 08:20:26,432 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 08:28:58,296 (trainer:732) INFO: 7epoch:train:4001-4100batch: iter_time=2.615, forward_time=0.108, loss_ctc=87.423, loss_att=75.461, acc=0.635, loss=79.049, backward_time=0.770, grad_norm=94.678, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.521e-04, train_time=11.078 +[gpua014:0/64] 2023-07-03 08:30:38,954 (trainer:732) INFO: 7epoch:train:4101-4200batch: iter_time=1.380e-04, forward_time=0.108, loss_ctc=75.872, loss_att=62.613, acc=0.654, loss=66.591, backward_time=0.755, grad_norm=82.364, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.519e-04, train_time=2.013 +[gpua014:0/64] 2023-07-03 08:32:19,075 (trainer:732) INFO: 7epoch:train:4201-4300batch: iter_time=1.023e-04, forward_time=0.107, loss_ctc=94.042, loss_att=66.149, acc=0.664, loss=74.517, backward_time=0.753, grad_norm=117.332, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.518e-04, train_time=2.002 +[gpua014:0/64] 2023-07-03 08:34:01,605 (trainer:732) INFO: 7epoch:train:4301-4400batch: iter_time=9.928e-05, forward_time=0.107, loss_ctc=66.175, loss_att=55.333, acc=0.650, loss=58.585, backward_time=0.759, grad_norm=81.191, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.517e-04, train_time=2.050 +[gpua014:0/64] 2023-07-03 08:36:00,009 (trainer:732) INFO: 7epoch:train:4401-4500batch: iter_time=1.002e-04, forward_time=0.106, loss_ctc=70.262, loss_att=59.536, acc=0.653, loss=62.754, backward_time=0.774, grad_norm=75.645, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.515e-04, train_time=2.368 +[gpua014:0/64] 2023-07-03 08:38:05,032 (trainer:732) INFO: 7epoch:train:4501-4600batch: iter_time=1.049e-04, forward_time=0.106, loss_ctc=89.583, loss_att=69.758, acc=0.629, loss=75.705, backward_time=0.786, grad_norm=99.400, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.514e-04, train_time=2.500 +[gpua014:0/64] 2023-07-03 08:39:50,778 (trainer:732) INFO: 7epoch:train:4601-4700batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=84.138, loss_att=65.587, acc=0.655, loss=71.152, backward_time=0.765, grad_norm=92.075, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.512e-04, train_time=2.115 +[gpua014:0/64] 2023-07-03 08:41:40,638 (trainer:732) INFO: 7epoch:train:4701-4800batch: iter_time=1.006e-04, forward_time=0.106, loss_ctc=73.197, loss_att=56.452, acc=0.665, loss=61.475, backward_time=0.766, grad_norm=80.960, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.511e-04, train_time=2.197 +[gpua014:0/64] 2023-07-03 08:43:38,356 (trainer:732) INFO: 7epoch:train:4801-4900batch: iter_time=9.723e-05, forward_time=0.106, loss_ctc=76.829, loss_att=59.725, acc=0.640, loss=64.856, backward_time=0.772, grad_norm=83.461, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.510e-04, train_time=2.354 +[gpua014:0/64] 2023-07-03 08:45:27,134 (trainer:732) INFO: 7epoch:train:4901-5000batch: iter_time=9.801e-05, forward_time=0.107, loss_ctc=91.616, loss_att=68.958, acc=0.640, loss=75.755, backward_time=0.761, grad_norm=96.348, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.508e-04, train_time=2.175 +[gpua014:0/64] 2023-07-03 08:45:28,909 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua014:0/64] 2023-07-03 08:45:51,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 08:45:55,336 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 08:45:55,336 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpua014:0/64] 2023-07-03 08:45:55,343 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 08:51:32,941 (trainer:732) INFO: 7epoch:train:5001-5100batch: iter_time=1.602, forward_time=0.172, loss_ctc=88.036, loss_att=75.518, acc=0.648, loss=79.274, backward_time=0.778, grad_norm=136.654, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.115, optim0_lr0=1.507e-04, train_time=7.316 +[gpua014:0/64] 2023-07-03 08:53:29,322 (trainer:732) INFO: 7epoch:train:5101-5200batch: iter_time=1.157e-04, forward_time=0.107, loss_ctc=76.596, loss_att=62.630, acc=0.665, loss=66.820, backward_time=0.782, grad_norm=85.789, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.505e-04, train_time=2.328 +[gpua014:0/64] 2023-07-03 08:55:20,487 (trainer:732) INFO: 7epoch:train:5201-5300batch: iter_time=1.116e-04, forward_time=0.107, loss_ctc=93.002, loss_att=64.591, acc=0.674, loss=73.115, backward_time=0.767, grad_norm=100.095, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.504e-04, train_time=2.223 +[gpua014:0/64] 2023-07-03 08:57:20,388 (trainer:732) INFO: 7epoch:train:5301-5400batch: iter_time=1.073e-04, forward_time=0.108, loss_ctc=66.339, loss_att=55.811, acc=0.659, loss=58.970, backward_time=0.795, grad_norm=74.518, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.503e-04, train_time=2.398 +[gpua014:0/64] 2023-07-03 08:59:11,806 (trainer:732) INFO: 7epoch:train:5401-5500batch: iter_time=1.043e-04, forward_time=0.109, loss_ctc=70.391, loss_att=58.922, acc=0.662, loss=62.363, backward_time=0.776, grad_norm=68.908, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.501e-04, train_time=2.228 +[gpua014:0/64] 2023-07-03 09:01:07,659 (trainer:732) INFO: 7epoch:train:5501-5600batch: iter_time=1.011e-04, forward_time=0.108, loss_ctc=89.550, loss_att=68.562, acc=0.642, loss=74.858, backward_time=0.780, grad_norm=93.115, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.500e-04, train_time=2.317 +[gpua014:0/64] 2023-07-03 09:03:00,602 (trainer:732) INFO: 7epoch:train:5601-5700batch: iter_time=1.080e-04, forward_time=0.108, loss_ctc=82.291, loss_att=65.123, acc=0.662, loss=70.273, backward_time=0.771, grad_norm=79.031, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.499e-04, train_time=2.259 +[gpua014:0/64] 2023-07-03 09:05:32,666 (trainer:732) INFO: 7epoch:train:5701-5800batch: iter_time=1.152e-04, forward_time=0.107, loss_ctc=73.361, loss_att=55.695, acc=0.672, loss=60.995, backward_time=0.856, grad_norm=71.120, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.497e-04, train_time=3.041 +[gpua014:0/64] 2023-07-03 09:07:52,843 (trainer:732) INFO: 7epoch:train:5801-5900batch: iter_time=1.130e-04, forward_time=0.107, loss_ctc=76.767, loss_att=58.968, acc=0.652, loss=64.308, backward_time=0.811, grad_norm=80.975, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.496e-04, train_time=2.803 +[gpua014:0/64] 2023-07-03 09:09:55,021 (trainer:732) INFO: 7epoch:train:5901-6000batch: iter_time=1.111e-04, forward_time=0.107, loss_ctc=90.310, loss_att=66.589, acc=0.657, loss=73.705, backward_time=0.785, grad_norm=83.293, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.495e-04, train_time=2.443 +[gpua014:0/64] 2023-07-03 09:10:15,050 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua014:0/64] 2023-07-03 09:10:37,177 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 09:10:41,353 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 09:10:41,353 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpua014:0/64] 2023-07-03 09:10:41,460 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 09:17:33,350 (trainer:732) INFO: 7epoch:train:6001-6100batch: iter_time=2.515, forward_time=0.148, loss_ctc=86.303, loss_att=76.234, acc=0.636, loss=79.255, backward_time=0.767, grad_norm=94.935, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.115, optim0_lr0=1.493e-04, train_time=9.165 +[gpua014:0/64] 2023-07-03 09:19:20,370 (trainer:732) INFO: 7epoch:train:6101-6200batch: iter_time=9.976e-05, forward_time=0.107, loss_ctc=75.093, loss_att=61.126, acc=0.658, loss=65.316, backward_time=0.770, grad_norm=76.421, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.492e-04, train_time=2.142 +[gpua014:0/64] 2023-07-03 09:21:06,445 (trainer:732) INFO: 7epoch:train:6201-6300batch: iter_time=1.019e-04, forward_time=0.107, loss_ctc=92.263, loss_att=64.445, acc=0.670, loss=72.790, backward_time=0.765, grad_norm=114.005, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.491e-04, train_time=2.121 +[gpua014:0/64] 2023-07-03 09:22:46,507 (trainer:732) INFO: 7epoch:train:6301-6400batch: iter_time=1.028e-04, forward_time=0.107, loss_ctc=68.110, loss_att=55.528, acc=0.654, loss=59.302, backward_time=0.750, grad_norm=71.688, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.489e-04, train_time=2.001 +[gpua014:0/64] 2023-07-03 09:24:26,341 (trainer:732) INFO: 7epoch:train:6401-6500batch: iter_time=1.034e-04, forward_time=0.107, loss_ctc=69.962, loss_att=59.255, acc=0.651, loss=62.467, backward_time=0.752, grad_norm=79.622, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.488e-04, train_time=1.996 +[gpua014:0/64] 2023-07-03 09:26:11,745 (trainer:732) INFO: 7epoch:train:6501-6600batch: iter_time=1.045e-04, forward_time=0.107, loss_ctc=89.595, loss_att=68.689, acc=0.632, loss=74.960, backward_time=0.762, grad_norm=92.404, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.487e-04, train_time=2.108 +[gpua014:0/64] 2023-07-03 09:27:57,943 (trainer:732) INFO: 7epoch:train:6601-6700batch: iter_time=1.058e-04, forward_time=0.107, loss_ctc=79.995, loss_att=63.315, acc=0.659, loss=68.319, backward_time=0.758, grad_norm=81.903, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.485e-04, train_time=2.124 +[gpua014:0/64] 2023-07-03 09:30:02,407 (trainer:732) INFO: 7epoch:train:6701-6800batch: iter_time=1.086e-04, forward_time=0.107, loss_ctc=72.661, loss_att=56.214, acc=0.665, loss=61.148, backward_time=0.790, grad_norm=73.510, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.484e-04, train_time=2.489 +[gpua014:0/64] 2023-07-03 09:31:55,436 (trainer:732) INFO: 7epoch:train:6801-6900batch: iter_time=1.070e-04, forward_time=0.106, loss_ctc=75.846, loss_att=58.150, acc=0.646, loss=63.459, backward_time=0.790, grad_norm=83.791, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.483e-04, train_time=2.260 +[gpua014:0/64] 2023-07-03 09:33:39,246 (trainer:732) INFO: 7epoch:train:6901-7000batch: iter_time=1.033e-04, forward_time=0.106, loss_ctc=93.412, loss_att=69.559, acc=0.643, loss=76.715, backward_time=0.755, grad_norm=124.091, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.481e-04, train_time=2.076 +[gpua014:0/64] 2023-07-03 09:33:41,064 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua014:0/64] 2023-07-03 09:34:03,574 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 09:34:07,898 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 09:34:07,898 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpua014:0/64] 2023-07-03 09:34:07,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 09:39:53,899 (trainer:732) INFO: 7epoch:train:7001-7100batch: iter_time=1.661, forward_time=0.165, loss_ctc=85.745, loss_att=74.852, acc=0.638, loss=78.120, backward_time=0.796, grad_norm=89.367, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.116, optim0_lr0=1.480e-04, train_time=7.493 +[gpua014:0/64] 2023-07-03 09:41:34,577 (trainer:732) INFO: 7epoch:train:7101-7200batch: iter_time=9.505e-05, forward_time=0.106, loss_ctc=75.545, loss_att=61.370, acc=0.658, loss=65.622, backward_time=0.754, grad_norm=83.909, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.479e-04, train_time=2.014 +[gpua014:0/64] 2023-07-03 09:43:21,052 (trainer:732) INFO: 7epoch:train:7201-7300batch: iter_time=9.087e-05, forward_time=0.107, loss_ctc=91.013, loss_att=63.631, acc=0.674, loss=71.845, backward_time=0.757, grad_norm=92.755, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.478e-04, train_time=2.129 +[gpua014:0/64] 2023-07-03 09:45:21,916 (trainer:732) INFO: 7epoch:train:7301-7400batch: iter_time=9.542e-05, forward_time=0.106, loss_ctc=66.154, loss_att=54.867, acc=0.655, loss=58.253, backward_time=0.800, grad_norm=71.200, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.476e-04, train_time=2.417 +[gpua014:0/64] 2023-07-03 09:47:10,246 (trainer:732) INFO: 7epoch:train:7401-7500batch: iter_time=9.928e-05, forward_time=0.106, loss_ctc=70.077, loss_att=58.476, acc=0.654, loss=61.956, backward_time=0.760, grad_norm=95.786, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.475e-04, train_time=2.166 +[gpua014:0/64] 2023-07-03 09:48:59,141 (trainer:732) INFO: 7epoch:train:7501-7600batch: iter_time=9.230e-05, forward_time=0.106, loss_ctc=87.055, loss_att=68.038, acc=0.636, loss=73.743, backward_time=0.777, grad_norm=89.669, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.474e-04, train_time=2.178 +[gpua014:0/64] 2023-07-03 09:51:08,737 (trainer:732) INFO: 7epoch:train:7601-7700batch: iter_time=9.734e-05, forward_time=0.107, loss_ctc=80.239, loss_att=62.993, acc=0.660, loss=68.167, backward_time=0.804, grad_norm=88.549, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.472e-04, train_time=2.592 +[gpua014:0/64] 2023-07-03 09:53:15,794 (trainer:732) INFO: 7epoch:train:7701-7800batch: iter_time=9.459e-05, forward_time=0.106, loss_ctc=71.141, loss_att=55.078, acc=0.670, loss=59.897, backward_time=0.808, grad_norm=70.481, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.471e-04, train_time=2.541 +[gpua014:0/64] 2023-07-03 09:55:31,351 (trainer:732) INFO: 7epoch:train:7801-7900batch: iter_time=9.697e-05, forward_time=0.106, loss_ctc=75.406, loss_att=58.833, acc=0.648, loss=63.805, backward_time=0.798, grad_norm=87.772, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.470e-04, train_time=2.711 +[gpua014:0/64] 2023-07-03 09:57:28,632 (trainer:732) INFO: 7epoch:train:7901-8000batch: iter_time=8.980e-05, forward_time=0.106, loss_ctc=91.543, loss_att=68.383, acc=0.647, loss=75.331, backward_time=0.779, grad_norm=86.404, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.469e-04, train_time=2.345 +[gpua014:0/64] 2023-07-03 09:57:47,691 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua014:0/64] 2023-07-03 09:58:09,847 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 09:58:14,125 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 09:58:14,125 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpua014:0/64] 2023-07-03 09:58:14,262 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 10:03:58,466 (trainer:732) INFO: 7epoch:train:8001-8100batch: iter_time=2.282, forward_time=0.154, loss_ctc=83.365, loss_att=75.432, acc=0.644, loss=77.812, backward_time=0.766, grad_norm=88.530, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.116, optim0_lr0=1.467e-04, train_time=7.796 +[gpua014:0/64] 2023-07-03 10:05:43,265 (trainer:732) INFO: 7epoch:train:8101-8200batch: iter_time=8.830e-05, forward_time=0.107, loss_ctc=74.584, loss_att=62.289, acc=0.666, loss=65.978, backward_time=0.763, grad_norm=79.943, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.466e-04, train_time=2.096 +[gpua014:0/64] 2023-07-03 10:07:29,895 (trainer:732) INFO: 7epoch:train:8201-8300batch: iter_time=8.939e-05, forward_time=0.108, loss_ctc=89.677, loss_att=62.797, acc=0.680, loss=70.861, backward_time=0.761, grad_norm=94.370, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.465e-04, train_time=2.132 +[gpua014:0/64] 2023-07-03 10:09:09,615 (trainer:732) INFO: 7epoch:train:8301-8400batch: iter_time=8.848e-05, forward_time=0.107, loss_ctc=65.961, loss_att=54.393, acc=0.663, loss=57.863, backward_time=0.750, grad_norm=73.361, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.464e-04, train_time=1.994 +[gpua014:0/64] 2023-07-03 10:10:55,553 (trainer:732) INFO: 7epoch:train:8401-8500batch: iter_time=9.190e-05, forward_time=0.108, loss_ctc=71.379, loss_att=58.830, acc=0.665, loss=62.595, backward_time=0.760, grad_norm=93.320, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.462e-04, train_time=2.119 +[gpua014:0/64] 2023-07-03 10:12:42,291 (trainer:732) INFO: 7epoch:train:8501-8600batch: iter_time=8.775e-05, forward_time=0.107, loss_ctc=86.728, loss_att=68.520, acc=0.644, loss=73.982, backward_time=0.764, grad_norm=91.445, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.461e-04, train_time=2.135 +[gpua014:0/64] 2023-07-03 10:14:31,632 (trainer:732) INFO: 7epoch:train:8601-8700batch: iter_time=8.929e-05, forward_time=0.108, loss_ctc=80.640, loss_att=63.366, acc=0.669, loss=68.548, backward_time=0.762, grad_norm=88.426, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.460e-04, train_time=2.187 +[gpua014:0/64] 2023-07-03 10:16:59,480 (trainer:732) INFO: 7epoch:train:8701-8800batch: iter_time=8.639e-05, forward_time=0.107, loss_ctc=71.500, loss_att=54.790, acc=0.676, loss=59.803, backward_time=0.872, grad_norm=79.042, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.459e-04, train_time=2.957 +[gpua014:0/64] 2023-07-03 10:18:46,729 (trainer:732) INFO: 7epoch:train:8801-8900batch: iter_time=8.369e-05, forward_time=0.107, loss_ctc=76.836, loss_att=60.705, acc=0.649, loss=65.544, backward_time=0.773, grad_norm=84.442, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.457e-04, train_time=2.145 +[gpua014:0/64] 2023-07-03 10:20:29,757 (trainer:732) INFO: 7epoch:train:8901-9000batch: iter_time=8.318e-05, forward_time=0.107, loss_ctc=91.639, loss_att=67.676, acc=0.659, loss=74.865, backward_time=0.753, grad_norm=85.993, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.456e-04, train_time=2.060 +[gpua014:0/64] 2023-07-03 10:20:32,022 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua014:0/64] 2023-07-03 10:20:54,558 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 10:20:58,838 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 10:20:58,839 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpua014:0/64] 2023-07-03 10:20:58,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 10:27:29,156 (trainer:732) INFO: 7epoch:train:9001-9100batch: iter_time=1.703, forward_time=0.178, loss_ctc=85.505, loss_att=72.798, acc=0.653, loss=76.610, backward_time=0.772, grad_norm=104.227, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.118, optim0_lr0=1.455e-04, train_time=8.387 +[gpua014:0/64] 2023-07-03 10:29:09,609 (trainer:732) INFO: 7epoch:train:9101-9200batch: iter_time=1.373e-04, forward_time=0.109, loss_ctc=74.188, loss_att=61.890, acc=0.667, loss=65.580, backward_time=0.754, grad_norm=96.431, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.454e-04, train_time=2.009 +[gpua014:0/64] 2023-07-03 10:30:54,185 (trainer:732) INFO: 7epoch:train:9201-9300batch: iter_time=1.398e-04, forward_time=0.108, loss_ctc=90.046, loss_att=61.760, acc=0.686, loss=70.246, backward_time=0.757, grad_norm=100.195, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.452e-04, train_time=2.091 +[gpua014:0/64] 2023-07-03 10:32:34,258 (trainer:732) INFO: 7epoch:train:9301-9400batch: iter_time=1.367e-04, forward_time=0.109, loss_ctc=64.672, loss_att=52.652, acc=0.671, loss=56.258, backward_time=0.752, grad_norm=84.092, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.451e-04, train_time=2.001 +[gpua014:0/64] 2023-07-03 10:34:14,068 (trainer:732) INFO: 7epoch:train:9401-9500batch: iter_time=1.388e-04, forward_time=0.109, loss_ctc=68.505, loss_att=59.046, acc=0.667, loss=61.884, backward_time=0.750, grad_norm=76.212, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.450e-04, train_time=1.996 +[gpua014:0/64] 2023-07-03 10:36:15,496 (trainer:732) INFO: 7epoch:train:9501-9600batch: iter_time=1.410e-04, forward_time=0.108, loss_ctc=86.461, loss_att=67.255, acc=0.648, loss=73.017, backward_time=0.800, grad_norm=88.972, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.449e-04, train_time=2.428 +[gpua014:0/64] 2023-07-03 10:38:26,787 (trainer:732) INFO: 7epoch:train:9601-9700batch: iter_time=1.310e-04, forward_time=0.110, loss_ctc=80.413, loss_att=63.005, acc=0.669, loss=68.227, backward_time=0.791, grad_norm=82.114, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.448e-04, train_time=2.626 +[gpua014:0/64] 2023-07-03 10:40:30,272 (trainer:732) INFO: 7epoch:train:9701-9800batch: iter_time=1.379e-04, forward_time=0.186, loss_ctc=74.679, loss_att=55.975, acc=0.675, loss=61.586, backward_time=0.792, grad_norm=75.191, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.117, optim0_lr0=1.446e-04, train_time=2.469 +[gpua014:0/64] 2023-07-03 10:42:43,755 (trainer:732) INFO: 7epoch:train:9801-9900batch: iter_time=1.366e-04, forward_time=0.124, loss_ctc=74.408, loss_att=58.232, acc=0.656, loss=63.085, backward_time=0.827, grad_norm=75.295, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.114, optim0_lr0=1.445e-04, train_time=2.669 +[gpua014:0/64] 2023-07-03 10:44:38,433 (trainer:732) INFO: 7epoch:train:9901-10000batch: iter_time=1.286e-04, forward_time=0.108, loss_ctc=90.445, loss_att=66.863, acc=0.659, loss=73.938, backward_time=0.778, grad_norm=87.658, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.444e-04, train_time=2.293 +[gpua014:0/64] 2023-07-03 10:57:01,879 (trainer:338) INFO: 7epoch results: [train] iter_time=0.255, forward_time=0.115, loss_ctc=81.029, loss_att=63.896, acc=0.652, loss=69.036, backward_time=0.778, grad_norm=89.792, clip=100.000, loss_scale=6.979e+08, optim_step_time=0.113, optim0_lr0=1.509e-04, train_time=3.021, time=4 hours, 12 minutes and 8.29 seconds, total_count=40000, gpu_max_cached_mem_GB=37.479, [valid] loss_ctc=61.220, cer_ctc=0.331, loss_att=50.294, acc=0.595, cer=0.461, wer=0.999, loss=53.572, time=6 minutes and 6.24 seconds, total_count=4554, gpu_max_cached_mem_GB=37.479, [att_plot] time=5 minutes and 53.25 seconds, total_count=0, gpu_max_cached_mem_GB=37.479 +[gpua014:0/64] 2023-07-03 10:57:17,180 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua014:0/64] 2023-07-03 10:57:17,199 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/2epoch.pth +[gpua014:0/64] 2023-07-03 10:57:17,199 (trainer:272) INFO: 8/100epoch started. Estimated time to finish: 2 weeks, 2 days and 12 hours +[gpua014:0/64] 2023-07-03 10:57:17,202 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua014:0/64] 2023-07-03 10:57:39,112 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 10:57:43,426 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 10:57:43,426 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpua014:0/64] 2023-07-03 10:57:43,434 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 11:05:34,810 (trainer:732) INFO: 8epoch:train:1-100batch: iter_time=3.863, forward_time=0.182, loss_ctc=71.953, loss_att=56.579, acc=0.652, loss=61.191, backward_time=0.767, grad_norm=81.674, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.116, optim0_lr0=1.443e-04, train_time=9.952 +[gpua014:0/64] 2023-07-03 11:07:15,061 (trainer:732) INFO: 8epoch:train:101-200batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=83.688, loss_att=62.700, acc=0.660, loss=68.996, backward_time=0.751, grad_norm=83.521, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.442e-04, train_time=2.005 +[gpua014:0/64] 2023-07-03 11:08:58,814 (trainer:732) INFO: 8epoch:train:201-300batch: iter_time=1.029e-04, forward_time=0.107, loss_ctc=77.702, loss_att=60.845, acc=0.642, loss=65.902, backward_time=0.769, grad_norm=72.915, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.440e-04, train_time=2.075 +[gpua014:0/64] 2023-07-03 11:10:55,797 (trainer:732) INFO: 8epoch:train:301-400batch: iter_time=1.058e-04, forward_time=0.107, loss_ctc=86.552, loss_att=68.588, acc=0.662, loss=73.977, backward_time=0.778, grad_norm=84.683, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.439e-04, train_time=2.339 +[gpua014:0/64] 2023-07-03 11:12:42,225 (trainer:732) INFO: 8epoch:train:401-500batch: iter_time=1.129e-04, forward_time=0.107, loss_ctc=89.365, loss_att=76.265, acc=0.638, loss=80.195, backward_time=0.779, grad_norm=103.391, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.438e-04, train_time=2.128 +[gpua014:0/64] 2023-07-03 11:14:54,649 (trainer:732) INFO: 8epoch:train:501-600batch: iter_time=1.316e-04, forward_time=0.107, loss_ctc=92.306, loss_att=69.410, acc=0.647, loss=76.279, backward_time=0.812, grad_norm=85.465, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.437e-04, train_time=2.648 +[gpua014:0/64] 2023-07-03 11:16:44,481 (trainer:732) INFO: 8epoch:train:601-700batch: iter_time=1.425e-04, forward_time=0.107, loss_ctc=81.930, loss_att=62.838, acc=0.653, loss=68.566, backward_time=0.777, grad_norm=113.254, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.436e-04, train_time=2.196 +[gpua014:0/64] 2023-07-03 11:18:37,508 (trainer:732) INFO: 8epoch:train:701-800batch: iter_time=1.136e-04, forward_time=0.107, loss_ctc=88.066, loss_att=64.322, acc=0.646, loss=71.445, backward_time=0.779, grad_norm=92.199, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.434e-04, train_time=2.260 +[gpua014:0/64] 2023-07-03 11:20:24,726 (trainer:732) INFO: 8epoch:train:801-900batch: iter_time=8.943e-05, forward_time=0.107, loss_ctc=93.568, loss_att=68.082, acc=0.659, loss=75.728, backward_time=0.769, grad_norm=140.705, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.433e-04, train_time=2.144 +[gpua014:0/64] 2023-07-03 11:22:22,840 (trainer:732) INFO: 8epoch:train:901-1000batch: iter_time=8.851e-05, forward_time=0.105, loss_ctc=70.937, loss_att=57.486, acc=0.648, loss=61.522, backward_time=0.786, grad_norm=72.406, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.432e-04, train_time=2.362 +[gpua014:0/64] 2023-07-03 11:22:34,179 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua014:0/64] 2023-07-03 11:22:55,942 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 11:23:00,214 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 11:23:00,214 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpua014:0/64] 2023-07-03 11:23:00,230 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 11:30:44,366 (trainer:732) INFO: 8epoch:train:1001-1100batch: iter_time=1.738, forward_time=0.177, loss_ctc=71.293, loss_att=55.648, acc=0.657, loss=60.341, backward_time=0.841, grad_norm=81.246, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.116, optim0_lr0=1.431e-04, train_time=10.029 +[gpua014:0/64] 2023-07-03 11:32:40,908 (trainer:732) INFO: 8epoch:train:1101-1200batch: iter_time=9.558e-05, forward_time=0.106, loss_ctc=83.943, loss_att=63.374, acc=0.659, loss=69.545, backward_time=0.790, grad_norm=79.844, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.430e-04, train_time=2.332 +[gpua014:0/64] 2023-07-03 11:34:35,694 (trainer:732) INFO: 8epoch:train:1201-1300batch: iter_time=8.459e-05, forward_time=0.107, loss_ctc=76.391, loss_att=59.918, acc=0.645, loss=64.860, backward_time=0.781, grad_norm=73.294, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.429e-04, train_time=2.296 +[gpua014:0/64] 2023-07-03 11:36:35,790 (trainer:732) INFO: 8epoch:train:1301-1400batch: iter_time=8.933e-05, forward_time=0.106, loss_ctc=85.561, loss_att=66.344, acc=0.667, loss=72.109, backward_time=0.802, grad_norm=87.421, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.427e-04, train_time=2.402 +[gpua014:0/64] 2023-07-03 11:38:49,290 (trainer:732) INFO: 8epoch:train:1401-1500batch: iter_time=9.077e-05, forward_time=0.107, loss_ctc=86.480, loss_att=76.600, acc=0.641, loss=79.564, backward_time=0.825, grad_norm=92.828, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.426e-04, train_time=2.670 +[gpua014:0/64] 2023-07-03 11:40:48,063 (trainer:732) INFO: 8epoch:train:1501-1600batch: iter_time=9.566e-05, forward_time=0.107, loss_ctc=89.979, loss_att=67.303, acc=0.653, loss=74.105, backward_time=0.798, grad_norm=91.494, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.425e-04, train_time=2.375 +[gpua014:0/64] 2023-07-03 11:42:50,035 (trainer:732) INFO: 8epoch:train:1601-1700batch: iter_time=9.295e-05, forward_time=0.107, loss_ctc=80.658, loss_att=61.124, acc=0.663, loss=66.984, backward_time=0.812, grad_norm=85.053, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.424e-04, train_time=2.439 +[gpua014:0/64] 2023-07-03 11:44:46,238 (trainer:732) INFO: 8epoch:train:1701-1800batch: iter_time=1.007e-04, forward_time=0.106, loss_ctc=86.900, loss_att=63.733, acc=0.650, loss=70.683, backward_time=0.787, grad_norm=98.100, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.423e-04, train_time=2.324 +[gpua014:0/64] 2023-07-03 11:47:06,824 (trainer:732) INFO: 8epoch:train:1801-1900batch: iter_time=1.073e-04, forward_time=0.107, loss_ctc=92.292, loss_att=68.604, acc=0.657, loss=75.710, backward_time=0.836, grad_norm=99.245, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.422e-04, train_time=2.811 +[gpua014:0/64] 2023-07-03 11:49:44,334 (trainer:732) INFO: 8epoch:train:1901-2000batch: iter_time=1.030e-04, forward_time=0.107, loss_ctc=71.578, loss_att=57.196, acc=0.649, loss=61.510, backward_time=0.863, grad_norm=79.403, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.420e-04, train_time=3.150 +[gpua014:0/64] 2023-07-03 11:50:04,363 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua014:0/64] 2023-07-03 11:50:26,890 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 11:50:31,243 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 11:50:31,244 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpua014:0/64] 2023-07-03 11:50:31,292 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 11:59:59,939 (trainer:732) INFO: 8epoch:train:2001-2100batch: iter_time=2.825, forward_time=0.200, loss_ctc=70.365, loss_att=54.572, acc=0.663, loss=59.310, backward_time=1.038, grad_norm=86.690, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.118, optim0_lr0=1.419e-04, train_time=12.312 +[gpua014:0/64] 2023-07-03 12:02:53,285 (trainer:732) INFO: 8epoch:train:2101-2200batch: iter_time=1.095e-04, forward_time=0.108, loss_ctc=83.391, loss_att=61.753, acc=0.666, loss=68.245, backward_time=0.883, grad_norm=77.824, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.418e-04, train_time=3.467 +[gpua014:0/64] 2023-07-03 12:05:41,833 (trainer:732) INFO: 8epoch:train:2201-2300batch: iter_time=1.132e-04, forward_time=0.108, loss_ctc=77.306, loss_att=60.610, acc=0.645, loss=65.619, backward_time=0.933, grad_norm=81.733, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.417e-04, train_time=3.371 +[gpua014:0/64] 2023-07-03 12:08:26,398 (trainer:732) INFO: 8epoch:train:2301-2400batch: iter_time=1.175e-04, forward_time=0.109, loss_ctc=83.620, loss_att=65.137, acc=0.672, loss=70.682, backward_time=0.857, grad_norm=94.295, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.416e-04, train_time=3.291 +[gpua014:0/64] 2023-07-03 12:10:59,777 (trainer:732) INFO: 8epoch:train:2401-2500batch: iter_time=1.165e-04, forward_time=0.109, loss_ctc=85.837, loss_att=73.558, acc=0.649, loss=77.242, backward_time=0.813, grad_norm=88.674, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.415e-04, train_time=3.067 +[gpua014:0/64] 2023-07-03 12:14:24,808 (trainer:732) INFO: 8epoch:train:2501-2600batch: iter_time=1.174e-04, forward_time=0.108, loss_ctc=91.106, loss_att=68.368, acc=0.650, loss=75.189, backward_time=0.981, grad_norm=90.440, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.414e-04, train_time=4.100 +[gpua014:0/64] 2023-07-03 12:17:14,627 (trainer:732) INFO: 8epoch:train:2601-2700batch: iter_time=1.268e-04, forward_time=0.108, loss_ctc=79.435, loss_att=61.102, acc=0.664, loss=66.602, backward_time=0.862, grad_norm=90.710, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.412e-04, train_time=3.396 +[gpua014:0/64] 2023-07-03 12:19:50,904 (trainer:732) INFO: 8epoch:train:2701-2800batch: iter_time=1.226e-04, forward_time=0.108, loss_ctc=85.706, loss_att=61.989, acc=0.655, loss=69.104, backward_time=0.803, grad_norm=90.672, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.411e-04, train_time=3.125 +[gpua014:0/64] 2023-07-03 12:22:45,720 (trainer:732) INFO: 8epoch:train:2801-2900batch: iter_time=1.332e-04, forward_time=0.108, loss_ctc=89.745, loss_att=67.272, acc=0.664, loss=74.014, backward_time=1.049, grad_norm=96.450, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.410e-04, train_time=3.496 +[gpua014:0/64] 2023-07-03 12:25:04,044 (trainer:732) INFO: 8epoch:train:2901-3000batch: iter_time=1.130e-04, forward_time=0.107, loss_ctc=72.146, loss_att=56.681, acc=0.656, loss=61.320, backward_time=0.809, grad_norm=81.054, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.409e-04, train_time=2.766 +[gpua014:0/64] 2023-07-03 12:25:21,806 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua014:0/64] 2023-07-03 12:25:43,932 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 12:25:48,257 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 12:25:48,257 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpua014:0/64] 2023-07-03 12:25:48,265 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 12:32:34,384 (trainer:732) INFO: 8epoch:train:3001-3100batch: iter_time=2.673, forward_time=0.160, loss_ctc=70.145, loss_att=55.103, acc=0.667, loss=59.616, backward_time=0.826, grad_norm=76.517, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.115, optim0_lr0=1.408e-04, train_time=9.006 +[gpua014:0/64] 2023-07-03 12:34:29,875 (trainer:732) INFO: 8epoch:train:3101-3200batch: iter_time=1.030e-04, forward_time=0.107, loss_ctc=83.797, loss_att=61.450, acc=0.667, loss=68.154, backward_time=0.777, grad_norm=90.037, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.407e-04, train_time=2.310 +[gpua014:0/64] 2023-07-03 12:36:26,930 (trainer:732) INFO: 8epoch:train:3201-3300batch: iter_time=1.020e-04, forward_time=0.107, loss_ctc=76.116, loss_att=59.328, acc=0.654, loss=64.365, backward_time=0.772, grad_norm=66.043, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.406e-04, train_time=2.341 +[gpua014:0/64] 2023-07-03 12:38:25,290 (trainer:732) INFO: 8epoch:train:3301-3400batch: iter_time=1.018e-04, forward_time=0.107, loss_ctc=84.464, loss_att=66.292, acc=0.673, loss=71.743, backward_time=0.773, grad_norm=78.714, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.405e-04, train_time=2.367 +[gpua014:0/64] 2023-07-03 12:40:38,648 (trainer:732) INFO: 8epoch:train:3401-3500batch: iter_time=9.895e-05, forward_time=0.107, loss_ctc=84.560, loss_att=73.887, acc=0.649, loss=77.089, backward_time=0.834, grad_norm=113.670, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.404e-04, train_time=2.667 +[gpua014:0/64] 2023-07-03 12:43:02,556 (trainer:732) INFO: 8epoch:train:3501-3600batch: iter_time=1.014e-04, forward_time=0.107, loss_ctc=89.255, loss_att=67.307, acc=0.654, loss=73.892, backward_time=0.852, grad_norm=93.040, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.402e-04, train_time=2.878 +[gpua014:0/64] 2023-07-03 12:45:13,037 (trainer:732) INFO: 8epoch:train:3601-3700batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=78.034, loss_att=59.867, acc=0.668, loss=65.317, backward_time=0.803, grad_norm=85.093, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.401e-04, train_time=2.609 +[gpua014:0/64] 2023-07-03 12:47:27,416 (trainer:732) INFO: 8epoch:train:3701-3800batch: iter_time=1.049e-04, forward_time=0.106, loss_ctc=86.547, loss_att=62.217, acc=0.654, loss=69.516, backward_time=0.795, grad_norm=88.246, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.400e-04, train_time=2.687 +[gpua014:0/64] 2023-07-03 12:50:02,668 (trainer:732) INFO: 8epoch:train:3801-3900batch: iter_time=1.054e-04, forward_time=0.107, loss_ctc=88.651, loss_att=65.454, acc=0.666, loss=72.413, backward_time=0.862, grad_norm=97.799, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.399e-04, train_time=3.105 +[gpua014:0/64] 2023-07-03 12:52:27,817 (trainer:732) INFO: 8epoch:train:3901-4000batch: iter_time=9.448e-05, forward_time=0.106, loss_ctc=71.610, loss_att=57.257, acc=0.655, loss=61.563, backward_time=0.826, grad_norm=77.210, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.398e-04, train_time=2.903 +[gpua014:0/64] 2023-07-03 12:52:47,846 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua014:0/64] 2023-07-03 12:53:10,281 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 12:53:14,749 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 12:53:14,749 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpua014:0/64] 2023-07-03 12:53:14,756 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 12:59:57,708 (trainer:732) INFO: 8epoch:train:4001-4100batch: iter_time=2.331, forward_time=0.155, loss_ctc=69.145, loss_att=54.149, acc=0.667, loss=58.648, backward_time=0.796, grad_norm=74.360, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.124, optim0_lr0=1.397e-04, train_time=8.997 +[gpua014:0/64] 2023-07-03 13:01:39,587 (trainer:732) INFO: 8epoch:train:4101-4200batch: iter_time=1.109e-04, forward_time=0.108, loss_ctc=81.363, loss_att=60.356, acc=0.676, loss=66.658, backward_time=0.754, grad_norm=76.531, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.396e-04, train_time=2.038 +[gpua014:0/64] 2023-07-03 13:03:28,592 (trainer:732) INFO: 8epoch:train:4201-4300batch: iter_time=9.674e-05, forward_time=0.107, loss_ctc=76.566, loss_att=59.580, acc=0.652, loss=64.676, backward_time=0.768, grad_norm=72.014, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.395e-04, train_time=2.180 +[gpua014:0/64] 2023-07-03 13:05:21,031 (trainer:732) INFO: 8epoch:train:4301-4400batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=84.096, loss_att=64.758, acc=0.676, loss=70.560, backward_time=0.778, grad_norm=87.357, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.394e-04, train_time=2.249 +[gpua014:0/64] 2023-07-03 13:07:31,694 (trainer:732) INFO: 8epoch:train:4401-4500batch: iter_time=1.007e-04, forward_time=0.106, loss_ctc=86.125, loss_att=74.133, acc=0.649, loss=77.730, backward_time=0.816, grad_norm=107.166, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.393e-04, train_time=2.613 +[gpua014:0/64] 2023-07-03 13:09:27,083 (trainer:732) INFO: 8epoch:train:4501-4600batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=89.580, loss_att=66.935, acc=0.653, loss=73.728, backward_time=0.774, grad_norm=107.438, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.392e-04, train_time=2.308 +[gpua014:0/64] 2023-07-03 13:11:25,430 (trainer:732) INFO: 8epoch:train:4601-4700batch: iter_time=9.916e-05, forward_time=0.107, loss_ctc=78.839, loss_att=59.185, acc=0.673, loss=65.081, backward_time=0.782, grad_norm=81.277, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.390e-04, train_time=2.367 +[gpua014:0/64] 2023-07-03 13:13:46,101 (trainer:732) INFO: 8epoch:train:4701-4800batch: iter_time=1.108e-04, forward_time=0.107, loss_ctc=84.629, loss_att=62.561, acc=0.654, loss=69.181, backward_time=0.817, grad_norm=88.020, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.389e-04, train_time=2.813 +[gpua014:0/64] 2023-07-03 13:15:46,588 (trainer:732) INFO: 8epoch:train:4801-4900batch: iter_time=9.762e-05, forward_time=0.106, loss_ctc=90.902, loss_att=66.494, acc=0.666, loss=73.816, backward_time=0.784, grad_norm=109.251, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.388e-04, train_time=2.410 +[gpua014:0/64] 2023-07-03 13:17:48,653 (trainer:732) INFO: 8epoch:train:4901-5000batch: iter_time=9.738e-05, forward_time=0.106, loss_ctc=71.522, loss_att=56.646, acc=0.657, loss=61.109, backward_time=0.805, grad_norm=74.553, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.387e-04, train_time=2.441 +[gpua014:0/64] 2023-07-03 13:17:51,304 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua014:0/64] 2023-07-03 13:18:13,656 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 13:18:17,899 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 13:18:17,899 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpua014:0/64] 2023-07-03 13:18:17,910 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 13:24:27,929 (trainer:732) INFO: 8epoch:train:5001-5100batch: iter_time=2.555, forward_time=0.149, loss_ctc=68.880, loss_att=54.004, acc=0.665, loss=58.467, backward_time=0.791, grad_norm=75.203, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.117, optim0_lr0=1.386e-04, train_time=7.985 +[gpua014:0/64] 2023-07-03 13:26:26,669 (trainer:732) INFO: 8epoch:train:5101-5200batch: iter_time=9.325e-05, forward_time=0.106, loss_ctc=82.288, loss_att=62.900, acc=0.654, loss=68.717, backward_time=0.788, grad_norm=86.630, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.385e-04, train_time=2.375 +[gpua014:0/64] 2023-07-03 13:28:25,293 (trainer:732) INFO: 8epoch:train:5201-5300batch: iter_time=9.276e-05, forward_time=0.106, loss_ctc=75.183, loss_att=59.997, acc=0.645, loss=64.553, backward_time=0.779, grad_norm=71.010, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.384e-04, train_time=2.372 +[gpua014:0/64] 2023-07-03 13:30:40,953 (trainer:732) INFO: 8epoch:train:5301-5400batch: iter_time=9.294e-05, forward_time=0.106, loss_ctc=85.428, loss_att=67.675, acc=0.665, loss=73.001, backward_time=0.797, grad_norm=84.101, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.383e-04, train_time=2.713 +[gpua014:0/64] 2023-07-03 13:32:52,185 (trainer:732) INFO: 8epoch:train:5401-5500batch: iter_time=9.385e-05, forward_time=0.106, loss_ctc=84.065, loss_att=75.164, acc=0.635, loss=77.834, backward_time=0.812, grad_norm=103.559, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.382e-04, train_time=2.624 +[gpua014:0/64] 2023-07-03 13:34:44,837 (trainer:732) INFO: 8epoch:train:5501-5600batch: iter_time=9.046e-05, forward_time=0.107, loss_ctc=89.001, loss_att=67.994, acc=0.647, loss=74.296, backward_time=0.771, grad_norm=82.224, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.381e-04, train_time=2.253 +[gpua014:0/64] 2023-07-03 13:36:42,385 (trainer:732) INFO: 8epoch:train:5601-5700batch: iter_time=9.856e-05, forward_time=0.106, loss_ctc=77.241, loss_att=59.045, acc=0.664, loss=64.504, backward_time=0.768, grad_norm=81.509, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.380e-04, train_time=2.351 +[gpua014:0/64] 2023-07-03 13:38:54,042 (trainer:732) INFO: 8epoch:train:5701-5800batch: iter_time=9.087e-05, forward_time=0.106, loss_ctc=84.116, loss_att=63.222, acc=0.644, loss=69.490, backward_time=0.817, grad_norm=97.988, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.379e-04, train_time=2.633 +[gpua014:0/64] 2023-07-03 13:40:49,774 (trainer:732) INFO: 8epoch:train:5801-5900batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=88.392, loss_att=65.440, acc=0.670, loss=72.326, backward_time=0.783, grad_norm=98.106, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.378e-04, train_time=2.314 +[gpua014:0/64] 2023-07-03 13:43:19,996 (trainer:732) INFO: 8epoch:train:5901-6000batch: iter_time=4.370e-04, forward_time=0.121, loss_ctc=69.786, loss_att=55.824, acc=0.658, loss=60.012, backward_time=0.846, grad_norm=74.843, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.377e-04, train_time=3.004 +[gpua014:0/64] 2023-07-03 13:43:40,024 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua014:0/64] 2023-07-03 13:44:02,407 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 13:44:06,645 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 13:44:06,645 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpua014:0/64] 2023-07-03 13:44:06,653 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 13:51:46,776 (trainer:732) INFO: 8epoch:train:6001-6100batch: iter_time=2.333, forward_time=0.142, loss_ctc=70.714, loss_att=54.097, acc=0.664, loss=59.082, backward_time=0.783, grad_norm=101.489, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.115, optim0_lr0=1.376e-04, train_time=10.135 +[gpua014:0/64] 2023-07-03 13:53:27,717 (trainer:732) INFO: 8epoch:train:6101-6200batch: iter_time=1.108e-04, forward_time=0.108, loss_ctc=82.574, loss_att=61.216, acc=0.660, loss=67.623, backward_time=0.754, grad_norm=77.311, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.375e-04, train_time=2.019 +[gpua014:0/64] 2023-07-03 13:55:07,590 (trainer:732) INFO: 8epoch:train:6201-6300batch: iter_time=1.078e-04, forward_time=0.108, loss_ctc=75.131, loss_att=58.601, acc=0.651, loss=63.560, backward_time=0.752, grad_norm=72.031, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.374e-04, train_time=1.997 +[gpua014:0/64] 2023-07-03 13:56:48,280 (trainer:732) INFO: 8epoch:train:6301-6400batch: iter_time=1.166e-04, forward_time=0.108, loss_ctc=82.936, loss_att=66.241, acc=0.668, loss=71.249, backward_time=0.753, grad_norm=81.953, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.373e-04, train_time=2.014 +[gpua014:0/64] 2023-07-03 13:58:33,889 (trainer:732) INFO: 8epoch:train:6401-6500batch: iter_time=1.188e-04, forward_time=0.107, loss_ctc=82.837, loss_att=73.861, acc=0.644, loss=76.554, backward_time=0.767, grad_norm=88.376, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.372e-04, train_time=2.112 +[gpua014:0/64] 2023-07-03 14:00:45,197 (trainer:732) INFO: 8epoch:train:6501-6600batch: iter_time=1.385e-04, forward_time=0.118, loss_ctc=88.465, loss_att=66.303, acc=0.651, loss=72.951, backward_time=0.813, grad_norm=94.601, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.370e-04, train_time=2.626 +[gpua014:0/64] 2023-07-03 14:02:48,614 (trainer:732) INFO: 8epoch:train:6601-6700batch: iter_time=1.137e-04, forward_time=0.107, loss_ctc=78.364, loss_att=59.506, acc=0.661, loss=65.163, backward_time=0.775, grad_norm=84.035, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.369e-04, train_time=2.468 +[gpua014:0/64] 2023-07-03 14:04:39,248 (trainer:732) INFO: 8epoch:train:6701-6800batch: iter_time=1.123e-04, forward_time=0.107, loss_ctc=86.589, loss_att=62.422, acc=0.648, loss=69.672, backward_time=0.774, grad_norm=104.287, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.368e-04, train_time=2.212 +[gpua014:0/64] 2023-07-03 14:06:28,935 (trainer:732) INFO: 8epoch:train:6801-6900batch: iter_time=1.166e-04, forward_time=0.108, loss_ctc=87.795, loss_att=64.863, acc=0.674, loss=71.743, backward_time=0.765, grad_norm=90.585, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.367e-04, train_time=2.194 +[gpua014:0/64] 2023-07-03 14:08:15,899 (trainer:732) INFO: 8epoch:train:6901-7000batch: iter_time=3.409e-04, forward_time=0.115, loss_ctc=69.167, loss_att=55.059, acc=0.659, loss=59.291, backward_time=0.767, grad_norm=74.421, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.366e-04, train_time=2.139 +[gpua014:0/64] 2023-07-03 14:08:34,844 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua014:0/64] 2023-07-03 14:08:57,090 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 14:09:01,412 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 14:09:01,412 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpua014:0/64] 2023-07-03 14:09:01,459 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 14:16:37,830 (trainer:732) INFO: 8epoch:train:7001-7100batch: iter_time=2.183, forward_time=0.156, loss_ctc=69.770, loss_att=53.693, acc=0.665, loss=58.516, backward_time=0.776, grad_norm=78.422, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.114, optim0_lr0=1.365e-04, train_time=10.038 +[gpua014:0/64] 2023-07-03 14:18:22,215 (trainer:732) INFO: 8epoch:train:7101-7200batch: iter_time=8.191e-05, forward_time=0.107, loss_ctc=79.880, loss_att=59.668, acc=0.665, loss=65.731, backward_time=0.763, grad_norm=82.036, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.364e-04, train_time=2.088 +[gpua014:0/64] 2023-07-03 14:20:03,765 (trainer:732) INFO: 8epoch:train:7201-7300batch: iter_time=9.511e-05, forward_time=0.107, loss_ctc=76.077, loss_att=58.461, acc=0.651, loss=63.746, backward_time=0.754, grad_norm=74.942, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.363e-04, train_time=2.031 +[gpua014:0/64] 2023-07-03 14:21:47,371 (trainer:732) INFO: 8epoch:train:7301-7400batch: iter_time=8.692e-05, forward_time=0.107, loss_ctc=83.341, loss_att=65.556, acc=0.666, loss=70.892, backward_time=0.761, grad_norm=90.425, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.362e-04, train_time=2.072 +[gpua014:0/64] 2023-07-03 14:23:40,249 (trainer:732) INFO: 8epoch:train:7401-7500batch: iter_time=1.086e-04, forward_time=0.107, loss_ctc=82.464, loss_att=71.751, acc=0.648, loss=74.965, backward_time=0.760, grad_norm=83.705, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.361e-04, train_time=2.257 +[gpua014:0/64] 2023-07-03 14:25:21,284 (trainer:732) INFO: 8epoch:train:7501-7600batch: iter_time=9.175e-05, forward_time=0.106, loss_ctc=85.298, loss_att=65.408, acc=0.654, loss=71.375, backward_time=0.755, grad_norm=87.441, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.360e-04, train_time=2.020 +[gpua014:0/64] 2023-07-03 14:27:14,374 (trainer:732) INFO: 8epoch:train:7601-7700batch: iter_time=8.964e-05, forward_time=0.107, loss_ctc=78.233, loss_att=59.465, acc=0.664, loss=65.095, backward_time=0.792, grad_norm=94.598, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.359e-04, train_time=2.262 +[gpua014:0/64] 2023-07-03 14:29:08,048 (trainer:732) INFO: 8epoch:train:7701-7800batch: iter_time=8.873e-05, forward_time=0.106, loss_ctc=85.613, loss_att=62.031, acc=0.648, loss=69.106, backward_time=0.772, grad_norm=102.067, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.358e-04, train_time=2.273 +[gpua014:0/64] 2023-07-03 14:31:11,252 (trainer:732) INFO: 8epoch:train:7801-7900batch: iter_time=7.179e-04, forward_time=0.117, loss_ctc=87.400, loss_att=64.545, acc=0.674, loss=71.401, backward_time=0.792, grad_norm=90.161, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.357e-04, train_time=2.464 +[gpua014:0/64] 2023-07-03 14:33:10,144 (trainer:732) INFO: 8epoch:train:7901-8000batch: iter_time=8.795e-05, forward_time=0.106, loss_ctc=71.367, loss_att=55.731, acc=0.661, loss=60.421, backward_time=0.768, grad_norm=76.860, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.356e-04, train_time=2.378 +[gpua014:0/64] 2023-07-03 14:33:25,369 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua014:0/64] 2023-07-03 14:33:47,771 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 14:33:52,129 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 14:33:52,129 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpua014:0/64] 2023-07-03 14:33:52,137 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 14:40:38,201 (trainer:732) INFO: 8epoch:train:8001-8100batch: iter_time=2.484, forward_time=0.149, loss_ctc=68.712, loss_att=53.165, acc=0.668, loss=57.829, backward_time=0.764, grad_norm=80.528, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.115, optim0_lr0=1.355e-04, train_time=8.961 +[gpua014:0/64] 2023-07-03 14:42:18,408 (trainer:732) INFO: 8epoch:train:8101-8200batch: iter_time=1.097e-04, forward_time=0.107, loss_ctc=81.164, loss_att=60.410, acc=0.665, loss=66.637, backward_time=0.751, grad_norm=85.474, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.354e-04, train_time=2.004 +[gpua014:0/64] 2023-07-03 14:43:58,521 (trainer:732) INFO: 8epoch:train:8201-8300batch: iter_time=1.187e-04, forward_time=0.107, loss_ctc=74.739, loss_att=57.756, acc=0.652, loss=62.851, backward_time=0.752, grad_norm=82.426, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.353e-04, train_time=2.002 +[gpua014:0/64] 2023-07-03 14:45:38,433 (trainer:732) INFO: 8epoch:train:8301-8400batch: iter_time=1.158e-04, forward_time=0.107, loss_ctc=81.923, loss_att=64.584, acc=0.670, loss=69.786, backward_time=0.751, grad_norm=91.957, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.352e-04, train_time=1.998 +[gpua014:0/64] 2023-07-03 14:47:18,296 (trainer:732) INFO: 8epoch:train:8401-8500batch: iter_time=1.168e-04, forward_time=0.107, loss_ctc=84.705, loss_att=73.219, acc=0.647, loss=76.665, backward_time=0.752, grad_norm=100.698, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.351e-04, train_time=1.997 +[gpua014:0/64] 2023-07-03 14:49:00,052 (trainer:732) INFO: 8epoch:train:8501-8600batch: iter_time=1.157e-04, forward_time=0.107, loss_ctc=88.293, loss_att=65.188, acc=0.655, loss=72.120, backward_time=0.753, grad_norm=96.122, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.350e-04, train_time=2.035 +[gpua014:0/64] 2023-07-03 14:50:43,788 (trainer:732) INFO: 8epoch:train:8601-8700batch: iter_time=1.141e-04, forward_time=0.107, loss_ctc=75.649, loss_att=58.224, acc=0.669, loss=63.452, backward_time=0.752, grad_norm=75.790, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.349e-04, train_time=2.075 +[gpua014:0/64] 2023-07-03 14:52:39,154 (trainer:732) INFO: 8epoch:train:8701-8800batch: iter_time=1.143e-04, forward_time=0.107, loss_ctc=85.753, loss_att=61.870, acc=0.652, loss=69.035, backward_time=0.771, grad_norm=99.146, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.348e-04, train_time=2.307 +[gpua014:0/64] 2023-07-03 14:54:24,402 (trainer:732) INFO: 8epoch:train:8801-8900batch: iter_time=1.138e-04, forward_time=0.107, loss_ctc=85.164, loss_att=62.906, acc=0.678, loss=69.584, backward_time=0.776, grad_norm=90.026, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.347e-04, train_time=2.105 +[gpua014:0/64] 2023-07-03 14:56:04,184 (trainer:732) INFO: 8epoch:train:8901-9000batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=69.672, loss_att=55.112, acc=0.661, loss=59.480, backward_time=0.751, grad_norm=69.355, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.346e-04, train_time=1.995 +[gpua014:0/64] 2023-07-03 14:56:22,103 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua014:0/64] 2023-07-03 14:56:44,408 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 14:56:48,755 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 14:56:48,755 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpua014:0/64] 2023-07-03 14:56:48,762 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 15:03:08,701 (trainer:732) INFO: 8epoch:train:9001-9100batch: iter_time=2.056, forward_time=0.167, loss_ctc=68.726, loss_att=53.032, acc=0.667, loss=57.740, backward_time=0.782, grad_norm=78.336, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.115, optim0_lr0=1.345e-04, train_time=8.490 +[gpua014:0/64] 2023-07-03 15:04:53,568 (trainer:732) INFO: 8epoch:train:9101-9200batch: iter_time=1.171e-04, forward_time=0.108, loss_ctc=80.650, loss_att=59.603, acc=0.669, loss=65.917, backward_time=0.757, grad_norm=89.252, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.344e-04, train_time=2.097 +[gpua014:0/64] 2023-07-03 15:06:37,221 (trainer:732) INFO: 8epoch:train:9201-9300batch: iter_time=1.141e-04, forward_time=0.108, loss_ctc=74.127, loss_att=58.026, acc=0.653, loss=62.857, backward_time=0.755, grad_norm=67.694, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.343e-04, train_time=2.073 +[gpua014:0/64] 2023-07-03 15:08:26,273 (trainer:732) INFO: 8epoch:train:9301-9400batch: iter_time=1.149e-04, forward_time=0.107, loss_ctc=83.723, loss_att=66.524, acc=0.670, loss=71.684, backward_time=0.766, grad_norm=82.671, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.343e-04, train_time=2.181 +[gpua014:0/64] 2023-07-03 15:10:15,946 (trainer:732) INFO: 8epoch:train:9401-9500batch: iter_time=1.135e-04, forward_time=0.107, loss_ctc=82.613, loss_att=72.767, acc=0.646, loss=75.721, backward_time=0.759, grad_norm=92.276, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.342e-04, train_time=2.193 +[gpua014:0/64] 2023-07-03 15:12:19,663 (trainer:732) INFO: 8epoch:train:9501-9600batch: iter_time=1.157e-04, forward_time=0.107, loss_ctc=87.132, loss_att=64.934, acc=0.655, loss=71.593, backward_time=0.785, grad_norm=88.736, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.341e-04, train_time=2.474 +[gpua014:0/64] 2023-07-03 15:14:13,005 (trainer:732) INFO: 8epoch:train:9601-9700batch: iter_time=1.217e-04, forward_time=0.107, loss_ctc=75.814, loss_att=57.986, acc=0.668, loss=63.334, backward_time=0.770, grad_norm=81.673, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.340e-04, train_time=2.267 +[gpua014:0/64] 2023-07-03 15:16:01,610 (trainer:732) INFO: 8epoch:train:9701-9800batch: iter_time=1.059e-04, forward_time=0.108, loss_ctc=83.144, loss_att=61.650, acc=0.651, loss=68.098, backward_time=0.762, grad_norm=89.447, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.339e-04, train_time=2.172 +[gpua014:0/64] 2023-07-03 15:18:01,769 (trainer:732) INFO: 8epoch:train:9801-9900batch: iter_time=1.216e-04, forward_time=0.107, loss_ctc=84.532, loss_att=63.464, acc=0.678, loss=69.784, backward_time=0.780, grad_norm=92.656, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.338e-04, train_time=2.403 +[gpua014:0/64] 2023-07-03 15:19:56,478 (trainer:732) INFO: 8epoch:train:9901-10000batch: iter_time=1.031e-04, forward_time=0.107, loss_ctc=69.278, loss_att=54.766, acc=0.663, loss=59.119, backward_time=0.765, grad_norm=70.410, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.337e-04, train_time=2.294 +[gpua014:0/64] 2023-07-03 15:32:08,521 (trainer:338) INFO: 8epoch results: [train] iter_time=0.251, forward_time=0.113, loss_ctc=80.997, loss_att=62.659, acc=0.658, loss=68.160, backward_time=0.795, grad_norm=87.106, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.388e-04, train_time=3.152, time=4 hours, 22 minutes and 59.51 seconds, total_count=50000, gpu_max_cached_mem_GB=37.479, [valid] loss_ctc=57.660, cer_ctc=0.322, loss_att=48.662, acc=0.601, cer=0.466, wer=0.999, loss=51.361, time=5 minutes and 58.26 seconds, total_count=5566, gpu_max_cached_mem_GB=37.479, [att_plot] time=5 minutes and 53.54 seconds, total_count=0, gpu_max_cached_mem_GB=37.479 +[gpua014:0/64] 2023-07-03 15:32:28,085 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua014:0/64] 2023-07-03 15:32:28,089 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/3epoch.pth +[gpua014:0/64] 2023-07-03 15:32:28,106 (trainer:272) INFO: 9/100epoch started. Estimated time to finish: 2 weeks, 2 days and 18 hours +[gpua014:0/64] 2023-07-03 15:32:29,330 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua014:0/64] 2023-07-03 15:32:51,326 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 15:32:57,543 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 15:32:57,543 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpua014:0/64] 2023-07-03 15:32:57,612 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 15:40:23,595 (trainer:732) INFO: 9epoch:train:1-100batch: iter_time=3.646, forward_time=0.167, loss_ctc=89.655, loss_att=65.568, acc=0.669, loss=72.794, backward_time=0.771, grad_norm=91.383, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.116, optim0_lr0=1.336e-04, train_time=9.496 +[gpua014:0/64] 2023-07-03 15:42:03,884 (trainer:732) INFO: 9epoch:train:101-200batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=76.077, loss_att=58.833, acc=0.645, loss=64.006, backward_time=0.754, grad_norm=92.730, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.335e-04, train_time=2.006 +[gpua014:0/64] 2023-07-03 15:43:43,905 (trainer:732) INFO: 9epoch:train:201-300batch: iter_time=1.263e-04, forward_time=0.107, loss_ctc=82.337, loss_att=66.100, acc=0.666, loss=70.971, backward_time=0.752, grad_norm=91.902, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.334e-04, train_time=2.000 +[gpua014:0/64] 2023-07-03 15:45:23,589 (trainer:732) INFO: 9epoch:train:301-400batch: iter_time=1.044e-04, forward_time=0.107, loss_ctc=69.665, loss_att=53.623, acc=0.654, loss=58.435, backward_time=0.751, grad_norm=86.718, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.333e-04, train_time=1.993 +[gpua014:0/64] 2023-07-03 15:47:05,530 (trainer:732) INFO: 9epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.107, loss_ctc=82.780, loss_att=67.200, acc=0.651, loss=71.874, backward_time=0.752, grad_norm=90.931, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.332e-04, train_time=2.039 +[gpua014:0/64] 2023-07-03 15:48:47,893 (trainer:732) INFO: 9epoch:train:501-600batch: iter_time=1.460e-04, forward_time=0.106, loss_ctc=76.113, loss_att=63.506, acc=0.647, loss=67.288, backward_time=0.755, grad_norm=95.358, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.331e-04, train_time=2.047 +[gpua014:0/64] 2023-07-03 15:50:36,432 (trainer:732) INFO: 9epoch:train:601-700batch: iter_time=1.492e-04, forward_time=0.107, loss_ctc=68.422, loss_att=50.873, acc=0.670, loss=56.138, backward_time=0.757, grad_norm=84.612, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.330e-04, train_time=2.171 +[gpua014:0/64] 2023-07-03 15:52:37,725 (trainer:732) INFO: 9epoch:train:701-800batch: iter_time=0.009, forward_time=0.205, loss_ctc=85.934, loss_att=70.542, acc=0.659, loss=75.160, backward_time=0.796, grad_norm=101.509, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.122, optim0_lr0=1.329e-04, train_time=2.425 +[gpua014:0/64] 2023-07-03 15:54:21,489 (trainer:732) INFO: 9epoch:train:801-900batch: iter_time=1.404e-04, forward_time=0.108, loss_ctc=83.452, loss_att=62.453, acc=0.673, loss=68.753, backward_time=0.754, grad_norm=74.119, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.328e-04, train_time=2.075 +[gpua014:0/64] 2023-07-03 15:56:18,047 (trainer:732) INFO: 9epoch:train:901-1000batch: iter_time=1.278e-04, forward_time=0.107, loss_ctc=82.426, loss_att=59.341, acc=0.672, loss=66.266, backward_time=0.778, grad_norm=99.810, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.327e-04, train_time=2.331 +[gpua014:0/64] 2023-07-03 15:56:37,176 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua014:0/64] 2023-07-03 15:56:59,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua014:0/64] 2023-07-03 15:57:03,191 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpua014:0/64] 2023-07-03 15:57:03,191 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpua014:0/64] 2023-07-03 15:57:03,201 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpua014:0/64] 2023-07-03 16:02:03,355 (trainer:732) INFO: 9epoch:train:1001-1100batch: iter_time=2.192, forward_time=0.178, loss_ctc=89.263, loss_att=65.838, acc=0.657, loss=72.866, backward_time=0.778, grad_norm=91.355, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.116, optim0_lr0=1.326e-04, train_time=6.905 +[gpua014:0/64] 2023-07-03 16:04:10,925 (trainer:732) INFO: 9epoch:train:1101-1200batch: iter_time=9.290e-05, forward_time=0.106, loss_ctc=76.178, loss_att=57.534, acc=0.641, loss=63.127, backward_time=0.780, grad_norm=81.265, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.325e-04, train_time=2.552 +[gpua014:0/64] 2023-07-03 16:06:00,839 (trainer:732) INFO: 9epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.106, loss_ctc=82.000, loss_att=64.829, acc=0.662, loss=69.980, backward_time=0.787, grad_norm=87.498, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.325e-04, train_time=2.198 +[gpua014:0/64] 2023-07-03 16:08:01,489 (trainer:732) INFO: 9epoch:train:1301-1400batch: iter_time=9.972e-05, forward_time=0.105, loss_ctc=68.486, loss_att=51.841, acc=0.652, loss=56.835, backward_time=0.791, grad_norm=84.985, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.324e-04, train_time=2.413 +[gpua014:0/64] 2023-07-03 16:09:53,617 (trainer:732) INFO: 9epoch:train:1401-1500batch: iter_time=1.087e-04, forward_time=0.106, loss_ctc=81.114, loss_att=65.917, acc=0.653, loss=70.476, backward_time=0.768, grad_norm=83.242, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.323e-04, train_time=2.242 +[gpua014:0/64] 2023-07-03 16:11:44,620 (trainer:732) INFO: 9epoch:train:1501-1600batch: iter_time=9.726e-05, forward_time=0.106, loss_ctc=74.640, loss_att=63.516, acc=0.648, loss=66.853, backward_time=0.767, grad_norm=85.450, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.322e-04, train_time=2.220 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py:481: UserWarning: An error happens at loading "dump/raw/org/ru_open_stt_train/data/format.60/data_wav.ark:982143989" + warnings.warn('An error happens at loading "{}"'.format(ark_name)) +ERROR:root:Error happened with path=exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4, type=kaldi_ark, id=ru_open_stt_public_youtube700_6ee68828cbe6b0f_000000000_000004400_rus_asr +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 510, in train_one_epoch + for iiter, (utt_id, batch) in enumerate( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/reporter.py", line 267, in measure_iter_time + retval = next(iterator) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/iterators/multiple_iter_factory.py", line 35, in build_iter + yield from iter_factory.build_iter(epoch, shuffle) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 628, in __next__ + data = self._next_data() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data + return self._process_data(data) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data + data.reraise() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_utils.py", line 543, in reraise + raise exception +PermissionError: Caught PermissionError in DataLoader worker process 0. +Original Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop + data = fetcher.fetch(index) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch + data = [self.dataset[idx] for idx in possibly_batched_index] + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in + data = [self.dataset[idx] for idx in possibly_batched_index] + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/dataset.py", line 513, in __getitem__ + value = loader[uid] + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/dataset.py", line 52, in __getitem__ + retval = self.loader[key] + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py", line 479, in __getitem__ + return self._loader(ark_name) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/matio.py", line 235, in load_mat + fd_dict[ark] = open_like_kaldi(ark, "rb") + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py", line 207, in open_like_kaldi + return io.open(name, mode, encoding=encoding) +PermissionError: [Errno 13] Permission denied: 'dump/raw/org/ru_open_stt_train/data/format.60/data_wav.ark' + +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 +srun: error: gpua068: task 11: Exited with exit code 1 +gpua018:3479290:3479374 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua096:2182301:2182391 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua096:2182300:2182393 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua014:1504762:1504842 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpua014:1504764:1504840 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua014:1504763:1504839 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua041:2383595:2383739 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua096:2182300:2182300 [2] NCCL INFO comm 0x50397010 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua062:3999118:3999195 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua091:1092313:1092414 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua091:1092314:1092412 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua018:3479289:3479375 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua093:1851602:1851693 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua014:1504763:1504763 [2] NCCL INFO comm 0x8d97bae0 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua041:2383594:2383742 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua041:2383593:2383741 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpua041:2383592:2383740 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpua060:2765423:2765503 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua060:2765421:2765505 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +[W ProcessGroupNCCL.cpp:948] [Rank 26] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 27] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 24] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 25] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpua041:2383594:2383594 [2] NCCL INFO comm 0xb8aa2570 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 30] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpua041:2383593:2383593 [1] NCCL INFO comm 0x4f62e590 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 29] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 28] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpua041:2383595:2383595 [3] NCCL INFO comm 0x50d6c2c0 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua020:3382569:3382650 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +[W ProcessGroupNCCL.cpp:948] [Rank 31] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 20] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 23] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 22] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 21] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 16] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 18] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 17] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 19] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpua063:1316628:1316714 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua022:3213422:3213505 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpua022:3213423:3213502 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua022:3213424:3213503 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua016:2146307:2146402 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua021:3546923:3547008 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua018:3479289:3479289 [2] NCCL INFO comm 0x50f23dd0 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua021:3546922:3547011 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua091:1092313:1092313 [2] NCCL INFO comm 0xb9112ed0 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua018:3479290:3479290 [3] NCCL INFO comm 0xb9d17510 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua014:1504764:1504764 [3] NCCL INFO comm 0x90a5180 rank 3 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua041:2383592:2383592 [0] NCCL INFO comm 0x500f1b90 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua060:2765421:2765421 [1] NCCL INFO comm 0xf88a8d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua022:3213423:3213423 [2] NCCL INFO comm 0xb5a97440 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua093:1851602:1851602 [2] NCCL INFO comm 0x8eeaee30 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua020:3382569:3382569 [3] NCCL INFO comm 0x50adbf90 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua093:1851603:1851603 [3] NCCL INFO comm 0x51d23280 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua022:3213422:3213422 [1] NCCL INFO comm 0xba8d1d30 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua096:2182299:2182392 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpua096:2182301:2182301 [3] NCCL INFO comm 0xb7ff8a70 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua014:1504762:1504762 [1] NCCL INFO comm 0xa6220c0 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua060:2765423:2765423 [3] NCCL INFO comm 0x8bb0f9c0 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua015:2678599:2678682 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpua015:2678601:2678679 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua016:2146307:2146307 [3] NCCL INFO comm 0xb69ce0b0 rank 11 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua088:4022852:4022942 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpua022:3213424:3213424 [3] NCCL INFO comm 0x4eeb3510 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua020:3382568:3382651 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua021:3546921:3546921 [1] NCCL INFO comm 0xb47c80d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua062:3999117:3999117 [2] NCCL INFO comm 0x5126ca20 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua093:1851601:1851695 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpua062:3999118:3999118 [3] NCCL INFO comm 0x4f6c8ad0 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua015:2678600:2678600 [2] NCCL INFO comm 0x4fb6eec0 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua063:1316626:1316715 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpua063:1316627:1316713 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpua060:2765422:2765422 [2] NCCL INFO comm 0x50dc0f50 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua021:3546923:3546923 [3] NCCL INFO comm 0x51142940 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua096:2182299:2182299 [1] NCCL INFO comm 0x50cf5510 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua062:3999116:3999116 [1] NCCL INFO comm 0x8302c90 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua091:1092314:1092314 [3] NCCL INFO comm 0x508531a0 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua021:3546922:3546922 [2] NCCL INFO comm 0xb64560d0 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua093:1851601:1851601 [1] NCCL INFO comm 0x8d2d4770 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua091:1092312:1092312 [1] NCCL INFO comm 0xb15c78d0 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua018:3479288:3479288 [1] NCCL INFO comm 0x5176eca0 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua063:1316626:1316626 [1] NCCL INFO comm 0x50a76db0 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua088:4022851:4022851 [2] NCCL INFO comm 0x8b447690 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua088:4022852:4022852 [3] NCCL INFO comm 0xb0a25610 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua015:2678601:2678601 [3] NCCL INFO comm 0x510fc310 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua063:1316628:1316628 [3] NCCL INFO comm 0x50f53420 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua016:2146306:2146306 [2] NCCL INFO comm 0xb80c42d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua016:2146305:2146305 [1] NCCL INFO comm 0x505b7da0 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua020:3382568:3382568 [2] NCCL INFO comm 0x4f345750 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua015:2678599:2678599 [1] NCCL INFO comm 0xb6725330 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua063:1316627:1316627 [2] NCCL INFO comm 0x50e1a4d0 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua020:3382567:3382567 [1] NCCL INFO comm 0x50206940 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua088:4022849:4022849 [0] NCCL INFO comm 0x8e555600 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua015:2678598:2678598 [0] NCCL INFO comm 0x5031b3d0 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua021:3546920:3546920 [0] NCCL INFO comm 0x91f0590 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua022:3213421:3213421 [0] NCCL INFO comm 0x5127a110 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua091:1092311:1092311 [0] NCCL INFO comm 0xb51c2df0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua062:3999115:3999115 [0] NCCL INFO comm 0x4f5279e0 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-2: +gpua096:2182298:2182298 [0] NCCL INFO comm 0x504ff500 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua020:3382566:3382566 [0] NCCL INFO comm 0x4ff54b70 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua088:4022850:4022850 [1] NCCL INFO comm 0xa543f510 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua014:1504761:1504761 [0] NCCL INFO comm 0x4fbe12c0 rank 0 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua018:3479287:3479287 [0] NCCL INFO comm 0xa20beed0 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpua016:2146304:2146304 [0] NCCL INFO comm 0x94649e0 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 53] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800146 milliseconds before timing out. +gpua093:1851600:1851600 [0] NCCL INFO comm 0x9c356d50 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 15] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800066 milliseconds before timing out. +gpua063:1316625:1316625 [0] NCCL INFO comm 0x50020800 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 55] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800096 milliseconds before timing out. +Process SpawnProcess-4: +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 59] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800088 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 3] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800061 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 29] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800091 milliseconds before timing out. +gpua060:2765420:2765420 [0] NCCL INFO comm 0x8ee9a7d0 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 30] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800089 milliseconds before timing out. +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 63] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800063 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 61] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800143 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 28] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800093 milliseconds before timing out. +Process SpawnProcess-4: +Traceback (most recent call last): +Process SpawnProcess-3: + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 31] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800090 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 14] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800090 milliseconds before timing out. +Process SpawnProcess-3: +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 62] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800062 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 1] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800053 milliseconds before timing out. +Process SpawnProcess-3: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 2] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800062 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 26] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800091 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 13] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800154 milliseconds before timing out. +Process SpawnProcess-1: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 48] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800306 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 10] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800153 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 33] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800109 milliseconds before timing out. +Process SpawnProcess-3: +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 54] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800085 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 57] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800178 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 58] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800098 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 34] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800108 milliseconds before timing out. +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 35] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800113 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 50] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800150 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 6] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800156 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 25] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800108 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 37] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800180 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 21] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800119 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 17] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800253 milliseconds before timing out. +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 42] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800177 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 43] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800115 milliseconds before timing out. +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 51] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800167 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 9] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800211 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 4] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800640 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 60] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800891 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 0] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800910 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 5] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800162 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 41] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800171 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 38] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800084 milliseconds before timing out. +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 11] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800103 milliseconds before timing out. +Process SpawnProcess-4: +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 19] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800119 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 52] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800908 milliseconds before timing out. +Process SpawnProcess-4: +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 27] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800101 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 39] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800092 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 36] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800908 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 18] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800163 milliseconds before timing out. +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 23] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800112 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 16] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800894 milliseconds before timing out. +Process SpawnProcess-4: +Process SpawnProcess-1: +Process SpawnProcess-2: +Traceback (most recent call last): +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 49] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800969 milliseconds before timing out. + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) +RuntimeError: [Rank 7] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800165 milliseconds before timing out. + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 12] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800914 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 40] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1801183 milliseconds before timing out. +Process SpawnProcess-1: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 8] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800956 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 22] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800114 milliseconds before timing out. +Process SpawnProcess-1: +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 24] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800865 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 56] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1801088 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 20] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800797 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 32] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1801293 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 +srun: error: gpua014: task 0: Exited with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 +srun: error: gpua091: task 13: Exited with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +srun: error: gpua063: task 10: Exited with exit code 1 +srun: error: gpua060: task 8: Exited with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 +srun: error: gpua062: task 9: Exited with exit code 1 +srun: error: gpua041: task 7: Exited with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + main() + main() + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + S2TTask.main(cmd=cmd) + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 + raise ProcessExitedException( + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main +Traceback (most recent call last): +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + return _run_code(code, main_globals, None, + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + exec(code, run_globals) + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 + raise ProcessExitedException( + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +srun: error: gpua096: task 15: Exited with exit code 1 +srun: error: gpua022: task 6: Exited with exit code 1 +srun: error: gpua016: task 2: Exited with exit code 1 +srun: error: gpua015: task 1: Exited with exit code 1 +srun: error: gpua021: task 5: Exited with exit code 1 +srun: error: gpua018: task 3: Exited with exit code 1 +srun: error: gpua093: task 14: Exited with exit code 1 +srun: error: gpua020: task 4: Exited with exit code 1 +srun: error: gpua088: task 12: Exited with exit code 1 +# Accounting: begin_time=1688368916 +# Accounting: end_time=1688420633 +# Accounting: time=51717 threads=1 +# Finished at Mon Jul 3 16:43:53 CDT 2023 with status 1 diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.12.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.12.log new file mode 100644 index 0000000000000000000000000000000000000000..d0bc4d19d3abe7c316fc8f96e3c1598cd5bb4663 --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.12.log @@ -0,0 +1,4556 @@ +# Running on gpub001.delta.ncsa.illinois.edu +# Started at Sun Jul 2 01:35:09 CDT 2023 +# SLURMD_NODENAME=gpub001 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2115302 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x32)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2115302 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[001,009,011-016,031-032,035,037-041,058-059,061,064-068,075,080,083,085,088-091]' +# SLURM_JOB_NUM_NODES=32 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=32 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[001,009,011-016,031-032,035,037-041,058-059,061,064-068,075,080,083,085,088-091]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x32)' +# SLURM_TASK_PID=279842 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub001 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 +[gpub001:0/128] 2023-07-02 01:39:06,588 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub001:0/128] 2023-07-02 01:39:08,180 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 128 nodes. +[gpub001:0/128] 2023-07-02 01:39:08,214 (s2t:483) INFO: Vocabulary size: 50002 +[gpub001:0/128] 2023-07-02 01:39:28,889 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub001:0/128] 2023-07-02 01:39:28,898 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub001:0/128] 2023-07-02 01:39:28,898 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub001:0/128] 2023-07-02 01:39:28,898 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub001:0/128] 2023-07-02 01:39:28,899 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub001:0/128] 2023-07-02 01:39:29,583 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub001:0/128] 2023-07-02 01:39:42,237 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 01:39:42,389 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 01:39:42,389 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=506, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub001:0/128] 2023-07-02 01:39:42,389 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=506, mean=256.1, min=256, max=257 +[gpub001:0/128] 2023-07-02 01:39:42,865 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 01:39:43,172 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 01:39:43,172 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub001:0/128] 2023-07-02 01:39:43,173 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +gpub001:279948:279948 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:279948:279948 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:279948:279948 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub001:0/128] 2023-07-02 01:39:49,879 (trainer:284) INFO: 1/100epoch started +[gpub001:0/128] 2023-07-02 01:39:49,937 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/128] 2023-07-02 01:40:11,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 01:40:15,587 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 01:40:15,588 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpub001:0/128] 2023-07-02 01:40:15,591 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +gpub012:1262644:1262644 [0] NCCL INFO cudaDriverVersion 12010 +gpub012:1262644:1262644 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1262644:1262644 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1262644:1262706 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1262644:1262706 [0] NCCL INFO Using network IB +gpub012:1262644:1262706 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub012:1262644:1262706 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub012:1262644:1262706 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub012:1262644:1262706 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub012:1262644:1262706 [0] NCCL INFO Connected all rings +gpub012:1262644:1262706 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub012:1262644:1262706 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub012:1262644:1262706 [0] NCCL INFO Connected all trees +gpub012:1262644:1262706 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub012:1262644:1262706 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1262644:1262706 [0] NCCL INFO comm 0xb67ef980 rank 12 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub067:1289107:1289107 [0] NCCL INFO cudaDriverVersion 12010 +gpub067:1289107:1289107 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1289107:1289107 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1289107:1289169 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1289107:1289169 [0] NCCL INFO Using network IB +gpub067:1289107:1289169 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub067:1289107:1289169 [0] NCCL INFO Trees [0] 89/92/-1->88->80 [1] 89/-1/-1->88->85 +gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 87[c7000] -> 88[7000] [receive] via NET/IB/0 +gpub067:1289107:1289169 [0] NCCL INFO Channel 01/0 : 87[c7000] -> 88[7000] [receive] via NET/IB/0 +gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 88[7000] -> 89[46000] via P2P/IPC +gpub067:1289107:1289169 [0] NCCL INFO Channel 01/0 : 88[7000] -> 89[46000] via P2P/IPC +gpub067:1289107:1289169 [0] NCCL INFO Connected all rings +gpub014:1242932:1242932 [2] NCCL INFO cudaDriverVersion 12010 +gpub014:1242932:1242932 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1242932:1242932 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1242932:1242996 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1242932:1242996 [2] NCCL INFO Using network IB +gpub014:1242932:1242996 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub014:1242932:1242996 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub014:1242932:1242996 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub014:1242932:1242996 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub014:1242932:1242996 [2] NCCL INFO Connected all rings +gpub014:1242932:1242996 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub014:1242932:1242996 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub064:1376670:1376670 [1] NCCL INFO cudaDriverVersion 12010 +gpub064:1376670:1376670 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.164<0> +gpub064:1376670:1376670 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub064:1376670:1376736 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.164<0> +gpub064:1376670:1376736 [1] NCCL INFO Using network IB +gpub064:1376670:1376736 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub064:1376670:1376736 [1] NCCL INFO Trees [0] 78/-1/-1->77->76 [1] 78/84/-1->77->76 +gpub064:1376670:1376736 [1] NCCL INFO Channel 00/0 : 77[46000] -> 78[85000] via P2P/IPC +gpub064:1376670:1376736 [1] NCCL INFO Channel 01/0 : 77[46000] -> 78[85000] via P2P/IPC +gpub064:1376670:1376736 [1] NCCL INFO Connected all rings +gpub064:1376670:1376736 [1] NCCL INFO Channel 01/0 : 77[46000] -> 84[7000] [send] via NET/IB/0 +gpub064:1376670:1376736 [1] NCCL INFO Channel 01/0 : 84[7000] -> 77[46000] [receive] via NET/IB/0 +gpub037:1358540:1358540 [2] NCCL INFO cudaDriverVersion 12010 +gpub037:1358540:1358540 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1358540:1358540 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1358540:1358598 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1358540:1358598 [2] NCCL INFO Using network IB +gpub037:1358540:1358598 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub037:1358540:1358598 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub037:1358540:1358598 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub037:1358540:1358598 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub037:1358540:1358598 [2] NCCL INFO Connected all rings +gpub037:1358540:1358598 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub037:1358540:1358598 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub012:1262647:1262647 [3] NCCL INFO cudaDriverVersion 12010 +gpub012:1262647:1262647 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1262647:1262647 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1262647:1262709 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1262647:1262709 [3] NCCL INFO Using network IB +gpub012:1262647:1262709 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub012:1262647:1262709 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub012:1262647:1262709 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub012:1262647:1262709 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub012:1262647:1262709 [3] NCCL INFO Connected all rings +gpub012:1262647:1262709 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub012:1262647:1262709 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub032:2709149:2709149 [1] NCCL INFO cudaDriverVersion 12010 +gpub032:2709149:2709149 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:2709149:2709149 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:2709149:2709215 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:2709149:2709215 [1] NCCL INFO Using network IB +gpub032:2709149:2709215 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub032:2709149:2709215 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub032:2709149:2709215 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub032:2709149:2709215 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub032:2709149:2709215 [1] NCCL INFO Connected all rings +gpub032:2709149:2709215 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub032:2709149:2709215 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub089:1443526:1443526 [0] NCCL INFO cudaDriverVersion 12010 +gpub089:1443526:1443526 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:1443526:1443526 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:1443526:1443886 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.189<0> +gpub089:1443526:1443886 [0] NCCL INFO Using network IB +gpub089:1443526:1443886 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub089:1443526:1443886 [0] NCCL INFO Trees [0] 117/-1/-1->116->121 [1] 117/112/-1->116->109 +gpub089:1443526:1443886 [0] NCCL INFO Channel 00/0 : 115[c7000] -> 116[7000] [receive] via NET/IB/0 +gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 115[c7000] -> 116[7000] [receive] via NET/IB/0 +gpub089:1443526:1443886 [0] NCCL INFO Channel 00/0 : 116[7000] -> 117[46000] via P2P/IPC +gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 116[7000] -> 117[46000] via P2P/IPC +gpub009:1313739:1313739 [0] NCCL INFO cudaDriverVersion 12010 +gpub009:1313739:1313739 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0> +gpub009:1313739:1313739 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub009:1313739:1313796 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.109<0> +gpub009:1313739:1313796 [0] NCCL INFO Using network IB +gpub009:1313739:1313796 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub009:1313739:1313796 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub009:1313739:1313796 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub009:1313739:1313796 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub009:1313739:1313796 [0] NCCL INFO Connected all rings +gpub039:1773546:1773546 [1] NCCL INFO cudaDriverVersion 12010 +gpub039:1773546:1773546 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:1773546:1773546 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:1773546:1773609 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:1773546:1773609 [1] NCCL INFO Using network IB +gpub039:1773546:1773609 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub039:1773546:1773609 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub039:1773546:1773609 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub039:1773546:1773609 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub039:1773546:1773609 [1] NCCL INFO Connected all rings +gpub039:1773546:1773609 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub039:1773546:1773609 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub059:1722911:1722911 [2] NCCL INFO cudaDriverVersion 12010 +gpub059:1722911:1722911 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:1722911:1722911 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:1722911:1722975 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0> +gpub059:1722911:1722975 [2] NCCL INFO Using network IB +gpub059:1722911:1722975 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub059:1722911:1722975 [2] NCCL INFO Trees [0] 71/-1/-1->70->69 [1] 71/-1/-1->70->69 +gpub059:1722911:1722975 [2] NCCL INFO Channel 00/0 : 70[85000] -> 71[c7000] via P2P/IPC +gpub059:1722911:1722975 [2] NCCL INFO Channel 01/0 : 70[85000] -> 71[c7000] via P2P/IPC +gpub059:1722911:1722975 [2] NCCL INFO Connected all rings +gpub059:1722911:1722975 [2] NCCL INFO Channel 00/0 : 70[85000] -> 69[46000] via P2P/IPC +gpub059:1722911:1722975 [2] NCCL INFO Channel 01/0 : 70[85000] -> 69[46000] via P2P/IPC +gpub067:1289107:1289169 [0] NCCL INFO Channel 01/0 : 85[46000] -> 88[7000] [receive] via NET/IB/0 +gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 88[7000] -> 92[7000] [send] via NET/IB/0 +gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 80[7000] -> 88[7000] [receive] via NET/IB/0 +gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 88[7000] -> 80[7000] [send] via NET/IB/0 +gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 92[7000] -> 88[7000] [receive] via NET/IB/0 +gpub067:1289107:1289169 [0] NCCL INFO Channel 01/0 : 88[7000] -> 85[46000] [send] via NET/IB/0 +gpub067:1289107:1289169 [0] NCCL INFO Connected all trees +gpub067:1289107:1289169 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub067:1289107:1289169 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1289107:1289169 [0] NCCL INFO comm 0x4f28f210 rank 88 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub014:1242932:1242996 [2] NCCL INFO Connected all trees +gpub014:1242932:1242996 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub014:1242932:1242996 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1242932:1242996 [2] NCCL INFO comm 0x51262ad0 rank 22 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub064:1376670:1376736 [1] NCCL INFO Channel 00/0 : 77[46000] -> 76[7000] via P2P/IPC +gpub064:1376670:1376736 [1] NCCL INFO Channel 01/0 : 77[46000] -> 76[7000] via P2P/IPC +gpub064:1376670:1376736 [1] NCCL INFO Connected all trees +gpub064:1376670:1376736 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub064:1376670:1376736 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub064:1376670:1376736 [1] NCCL INFO comm 0x504a2c90 rank 77 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub037:1358540:1358598 [2] NCCL INFO Connected all trees +gpub037:1358540:1358598 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub037:1358540:1358598 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1358540:1358598 [2] NCCL INFO comm 0x8ec6490 rank 46 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub037:1358541:1358541 [3] NCCL INFO cudaDriverVersion 12010 +gpub037:1358541:1358541 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1358541:1358541 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1358541:1358597 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1358541:1358597 [3] NCCL INFO Using network IB +gpub037:1358541:1358597 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub037:1358541:1358597 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub037:1358541:1358597 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub012:1262647:1262709 [3] NCCL INFO Connected all trees +gpub012:1262647:1262709 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub012:1262647:1262709 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1262647:1262709 [3] NCCL INFO comm 0xa9d6d20 rank 15 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub032:2709149:2709215 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub032:2709149:2709215 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub032:2709149:2709215 [1] NCCL INFO Connected all trees +gpub032:2709149:2709215 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub032:2709149:2709215 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:2709149:2709215 [1] NCCL INFO comm 0x4f9a7620 rank 37 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub089:1443526:1443886 [0] NCCL INFO Connected all rings +gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 112[7000] -> 116[7000] [receive] via NET/IB/0 +gpub089:1443526:1443886 [0] NCCL INFO Channel 00/0 : 116[7000] -> 121[46000] [send] via NET/IB/0 +gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 109[46000] -> 116[7000] [receive] via NET/IB/0 +gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 116[7000] -> 109[46000] [send] via NET/IB/0 +gpub089:1443526:1443886 [0] NCCL INFO Channel 00/0 : 121[46000] -> 116[7000] [receive] via NET/IB/0 +gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 116[7000] -> 112[7000] [send] via NET/IB/0 +gpub089:1443526:1443886 [0] NCCL INFO Connected all trees +gpub089:1443526:1443886 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub089:1443526:1443886 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:1443526:1443886 [0] NCCL INFO comm 0x9a37990 rank 116 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub009:1313739:1313796 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub009:1313739:1313796 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub009:1313739:1313796 [0] NCCL INFO Connected all trees +gpub009:1313739:1313796 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub009:1313739:1313796 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub009:1313739:1313796 [0] NCCL INFO comm 0xd47f4d80 rank 4 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub039:1773546:1773609 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub039:1773546:1773609 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub039:1773546:1773609 [1] NCCL INFO Connected all trees +gpub039:1773546:1773609 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub039:1773546:1773609 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:1773546:1773609 [1] NCCL INFO comm 0x51566170 rank 53 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub059:1722911:1722975 [2] NCCL INFO Connected all trees +gpub059:1722911:1722975 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub059:1722911:1722975 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:1722911:1722975 [2] NCCL INFO comm 0x50aa6ce0 rank 70 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub067:1289110:1289110 [3] NCCL INFO cudaDriverVersion 12010 +gpub067:1289110:1289110 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1289110:1289110 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1289110:1289167 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1289110:1289167 [3] NCCL INFO Using network IB +gpub067:1289110:1289167 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub067:1289110:1289167 [3] NCCL INFO Trees [0] -1/-1/-1->91->90 [1] -1/-1/-1->91->90 +gpub067:1289110:1289167 [3] NCCL INFO Channel 00/0 : 91[c7000] -> 92[7000] [send] via NET/IB/0 +gpub067:1289110:1289167 [3] NCCL INFO Channel 01/0 : 91[c7000] -> 92[7000] [send] via NET/IB/0 +gpub067:1289110:1289167 [3] NCCL INFO Connected all rings +gpub067:1289110:1289167 [3] NCCL INFO Channel 00/0 : 91[c7000] -> 90[85000] via P2P/IPC +gpub067:1289110:1289167 [3] NCCL INFO Channel 01/0 : 91[c7000] -> 90[85000] via P2P/IPC +gpub014:1242930:1242930 [0] NCCL INFO cudaDriverVersion 12010 +gpub014:1242930:1242930 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1242930:1242930 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1242930:1242994 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1242930:1242994 [0] NCCL INFO Using network IB +gpub014:1242930:1242994 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub014:1242930:1242994 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub014:1242930:1242994 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub014:1242930:1242994 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub014:1242930:1242994 [0] NCCL INFO Connected all rings +gpub064:1376671:1376671 [2] NCCL INFO cudaDriverVersion 12010 +gpub064:1376671:1376671 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.164<0> +gpub064:1376671:1376671 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub064:1376671:1376737 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.164<0> +gpub064:1376671:1376737 [2] NCCL INFO Using network IB +gpub064:1376671:1376737 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub064:1376671:1376737 [2] NCCL INFO Trees [0] 79/-1/-1->78->77 [1] 79/-1/-1->78->77 +gpub064:1376671:1376737 [2] NCCL INFO Channel 00/0 : 78[85000] -> 79[c7000] via P2P/IPC +gpub064:1376671:1376737 [2] NCCL INFO Channel 01/0 : 78[85000] -> 79[c7000] via P2P/IPC +gpub064:1376671:1376737 [2] NCCL INFO Connected all rings +gpub064:1376671:1376737 [2] NCCL INFO Channel 00/0 : 78[85000] -> 77[46000] via P2P/IPC +gpub064:1376671:1376737 [2] NCCL INFO Channel 01/0 : 78[85000] -> 77[46000] via P2P/IPC +gpub037:1358541:1358597 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub037:1358541:1358597 [3] NCCL INFO Connected all rings +gpub037:1358541:1358597 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub037:1358541:1358597 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub037:1358541:1358597 [3] NCCL INFO Connected all trees +gpub037:1358541:1358597 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub037:1358541:1358597 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1358541:1358597 [3] NCCL INFO comm 0x9c2bba0 rank 47 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub012:1262646:1262646 [2] NCCL INFO cudaDriverVersion 12010 +gpub012:1262646:1262646 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1262646:1262646 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1262646:1262708 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1262646:1262708 [2] NCCL INFO Using network IB +gpub012:1262646:1262708 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub012:1262646:1262708 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub012:1262646:1262708 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub012:1262646:1262708 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub012:1262646:1262708 [2] NCCL INFO Connected all rings +gpub012:1262646:1262708 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub012:1262646:1262708 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub032:2709150:2709150 [2] NCCL INFO cudaDriverVersion 12010 +gpub032:2709150:2709150 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:2709150:2709150 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:2709150:2709213 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:2709150:2709213 [2] NCCL INFO Using network IB +gpub032:2709150:2709213 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub032:2709150:2709213 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub032:2709150:2709213 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub032:2709150:2709213 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub032:2709150:2709213 [2] NCCL INFO Connected all rings +gpub032:2709150:2709213 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub032:2709150:2709213 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub089:1443527:1443527 [1] NCCL INFO cudaDriverVersion 12010 +gpub089:1443527:1443527 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:1443527:1443527 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:1443527:1443889 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.189<0> +gpub089:1443527:1443889 [1] NCCL INFO Using network IB +gpub089:1443527:1443889 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub089:1443527:1443889 [1] NCCL INFO Trees [0] 118/-1/-1->117->116 [1] 118/120/-1->117->116 +gpub089:1443527:1443889 [1] NCCL INFO Channel 00/0 : 117[46000] -> 118[85000] via P2P/IPC +gpub089:1443527:1443889 [1] NCCL INFO Channel 01/0 : 117[46000] -> 118[85000] via P2P/IPC +gpub089:1443527:1443889 [1] NCCL INFO Connected all rings +gpub089:1443527:1443889 [1] NCCL INFO Channel 01/0 : 117[46000] -> 120[7000] [send] via NET/IB/0 +gpub089:1443527:1443889 [1] NCCL INFO Channel 01/0 : 120[7000] -> 117[46000] [receive] via NET/IB/0 +gpub009:1313741:1313741 [2] NCCL INFO cudaDriverVersion 12010 +gpub009:1313741:1313741 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0> +gpub009:1313741:1313741 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub009:1313741:1313799 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.109<0> +gpub009:1313741:1313799 [2] NCCL INFO Using network IB +gpub009:1313741:1313799 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub009:1313741:1313799 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub009:1313741:1313799 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub009:1313741:1313799 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub009:1313741:1313799 [2] NCCL INFO Connected all rings +gpub009:1313741:1313799 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub009:1313741:1313799 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub039:1773548:1773548 [3] NCCL INFO cudaDriverVersion 12010 +gpub039:1773548:1773548 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:1773548:1773548 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:1773548:1773612 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:1773548:1773612 [3] NCCL INFO Using network IB +gpub039:1773548:1773612 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub039:1773548:1773612 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub039:1773548:1773612 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub039:1773548:1773612 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub039:1773548:1773612 [3] NCCL INFO Connected all rings +gpub039:1773548:1773612 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub039:1773548:1773612 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub067:1289110:1289167 [3] NCCL INFO Connected all trees +gpub067:1289110:1289167 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub067:1289110:1289167 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1289110:1289167 [3] NCCL INFO comm 0x50179280 rank 91 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub014:1242930:1242994 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub014:1242930:1242994 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub014:1242930:1242994 [0] NCCL INFO Connected all trees +gpub014:1242930:1242994 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub014:1242930:1242994 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1242930:1242994 [0] NCCL INFO comm 0x9cc906c0 rank 20 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub064:1376671:1376737 [2] NCCL INFO Connected all trees +gpub064:1376671:1376737 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub064:1376671:1376737 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub064:1376671:1376737 [2] NCCL INFO comm 0x8cf5dc0 rank 78 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub037:1358539:1358539 [1] NCCL INFO cudaDriverVersion 12010 +gpub037:1358539:1358539 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1358539:1358539 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1358539:1358600 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1358539:1358600 [1] NCCL INFO Using network IB +gpub037:1358539:1358600 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub037:1358539:1358600 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub037:1358539:1358600 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub037:1358539:1358600 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub037:1358539:1358600 [1] NCCL INFO Connected all rings +gpub037:1358539:1358600 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub037:1358539:1358600 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub012:1262646:1262708 [2] NCCL INFO Connected all trees +gpub012:1262646:1262708 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub012:1262646:1262708 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1262646:1262708 [2] NCCL INFO comm 0xa94349a0 rank 14 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub032:2709150:2709213 [2] NCCL INFO Connected all trees +gpub032:2709150:2709213 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub032:2709150:2709213 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:2709150:2709213 [2] NCCL INFO comm 0x50422250 rank 38 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub089:1443527:1443889 [1] NCCL INFO Channel 00/0 : 117[46000] -> 116[7000] via P2P/IPC +gpub089:1443527:1443889 [1] NCCL INFO Channel 01/0 : 117[46000] -> 116[7000] via P2P/IPC +gpub089:1443527:1443889 [1] NCCL INFO Connected all trees +gpub089:1443527:1443889 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub089:1443527:1443889 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:1443527:1443889 [1] NCCL INFO comm 0x8c6983c0 rank 117 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub009:1313741:1313799 [2] NCCL INFO Connected all trees +gpub009:1313741:1313799 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub009:1313741:1313799 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub009:1313741:1313799 [2] NCCL INFO comm 0x50fe65e0 rank 6 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub039:1773548:1773612 [3] NCCL INFO Connected all trees +gpub039:1773548:1773612 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub039:1773548:1773612 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:1773548:1773612 [3] NCCL INFO comm 0x8ad50680 rank 55 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub067:1289109:1289109 [2] NCCL INFO cudaDriverVersion 12010 +gpub067:1289109:1289109 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1289109:1289109 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1289109:1289166 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1289109:1289166 [2] NCCL INFO Using network IB +gpub067:1289109:1289166 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub067:1289109:1289166 [2] NCCL INFO Trees [0] 91/-1/-1->90->89 [1] 91/-1/-1->90->89 +gpub067:1289109:1289166 [2] NCCL INFO Channel 00/0 : 90[85000] -> 91[c7000] via P2P/IPC +gpub067:1289109:1289166 [2] NCCL INFO Channel 01/0 : 90[85000] -> 91[c7000] via P2P/IPC +gpub067:1289109:1289166 [2] NCCL INFO Connected all rings +gpub067:1289109:1289166 [2] NCCL INFO Channel 00/0 : 90[85000] -> 89[46000] via P2P/IPC +gpub067:1289109:1289166 [2] NCCL INFO Channel 01/0 : 90[85000] -> 89[46000] via P2P/IPC +gpub014:1242933:1242933 [3] NCCL INFO cudaDriverVersion 12010 +gpub014:1242933:1242933 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1242933:1242933 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1242933:1242997 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1242933:1242997 [3] NCCL INFO Using network IB +gpub014:1242933:1242997 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub014:1242933:1242997 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub014:1242933:1242997 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub014:1242933:1242997 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub014:1242933:1242997 [3] NCCL INFO Connected all rings +gpub014:1242933:1242997 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub014:1242933:1242997 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub064:1376672:1376672 [3] NCCL INFO cudaDriverVersion 12010 +gpub064:1376672:1376672 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.164<0> +gpub064:1376672:1376672 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub064:1376672:1376735 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.164<0> +gpub064:1376672:1376735 [3] NCCL INFO Using network IB +gpub064:1376672:1376735 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub064:1376672:1376735 [3] NCCL INFO Trees [0] -1/-1/-1->79->78 [1] -1/-1/-1->79->78 +gpub064:1376672:1376735 [3] NCCL INFO Channel 00/0 : 79[c7000] -> 80[7000] [send] via NET/IB/0 +gpub064:1376672:1376735 [3] NCCL INFO Channel 01/0 : 79[c7000] -> 80[7000] [send] via NET/IB/0 +gpub064:1376672:1376735 [3] NCCL INFO Connected all rings +gpub064:1376672:1376735 [3] NCCL INFO Channel 00/0 : 79[c7000] -> 78[85000] via P2P/IPC +gpub064:1376672:1376735 [3] NCCL INFO Channel 01/0 : 79[c7000] -> 78[85000] via P2P/IPC +gpub037:1358539:1358600 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub037:1358539:1358600 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub037:1358539:1358600 [1] NCCL INFO Connected all trees +gpub037:1358539:1358600 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub037:1358539:1358600 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1358539:1358600 [1] NCCL INFO comm 0x4f6a3790 rank 45 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub032:2709151:2709151 [3] NCCL INFO cudaDriverVersion 12010 +gpub032:2709151:2709151 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:2709151:2709151 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:2709151:2709214 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:2709151:2709214 [3] NCCL INFO Using network IB +gpub032:2709151:2709214 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub032:2709151:2709214 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub032:2709151:2709214 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub032:2709151:2709214 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub032:2709151:2709214 [3] NCCL INFO Connected all rings +gpub032:2709151:2709214 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub032:2709151:2709214 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub089:1443528:1443528 [2] NCCL INFO cudaDriverVersion 12010 +gpub089:1443528:1443528 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:1443528:1443528 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:1443528:1443888 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.189<0> +gpub089:1443528:1443888 [2] NCCL INFO Using network IB +gpub089:1443528:1443888 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub089:1443528:1443888 [2] NCCL INFO Trees [0] 119/-1/-1->118->117 [1] 119/-1/-1->118->117 +gpub089:1443528:1443888 [2] NCCL INFO Channel 00/0 : 118[85000] -> 119[c7000] via P2P/IPC +gpub089:1443528:1443888 [2] NCCL INFO Channel 01/0 : 118[85000] -> 119[c7000] via P2P/IPC +gpub089:1443528:1443888 [2] NCCL INFO Connected all rings +gpub089:1443528:1443888 [2] NCCL INFO Channel 00/0 : 118[85000] -> 117[46000] via P2P/IPC +gpub089:1443528:1443888 [2] NCCL INFO Channel 01/0 : 118[85000] -> 117[46000] via P2P/IPC +gpub039:1773545:1773545 [0] NCCL INFO cudaDriverVersion 12010 +gpub039:1773545:1773545 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:1773545:1773545 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:1773545:1773610 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:1773545:1773610 [0] NCCL INFO Using network IB +gpub039:1773545:1773610 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub039:1773545:1773610 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub039:1773545:1773610 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub039:1773545:1773610 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub039:1773545:1773610 [0] NCCL INFO Connected all rings +gpub067:1289109:1289166 [2] NCCL INFO Connected all trees +gpub067:1289109:1289166 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub067:1289109:1289166 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1289109:1289166 [2] NCCL INFO comm 0x4f921a60 rank 90 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub014:1242933:1242997 [3] NCCL INFO Connected all trees +gpub014:1242933:1242997 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub014:1242933:1242997 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1242933:1242997 [3] NCCL INFO comm 0x9eb06d0 rank 23 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub064:1376672:1376735 [3] NCCL INFO Connected all trees +gpub064:1376672:1376735 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub064:1376672:1376735 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub064:1376672:1376735 [3] NCCL INFO comm 0xc156a340 rank 79 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub037:1358538:1358538 [0] NCCL INFO cudaDriverVersion 12010 +gpub037:1358538:1358538 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1358538:1358538 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1358538:1358599 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1358538:1358599 [0] NCCL INFO Using network IB +gpub037:1358538:1358599 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub037:1358538:1358599 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub037:1358538:1358599 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub037:1358538:1358599 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub037:1358538:1358599 [0] NCCL INFO Connected all rings +gpub032:2709151:2709214 [3] NCCL INFO Connected all trees +gpub032:2709151:2709214 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub032:2709151:2709214 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:2709151:2709214 [3] NCCL INFO comm 0x514b7d40 rank 39 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub089:1443528:1443888 [2] NCCL INFO Connected all trees +gpub089:1443528:1443888 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub089:1443528:1443888 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:1443528:1443888 [2] NCCL INFO comm 0xb9b05cd0 rank 118 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub039:1773545:1773610 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub039:1773545:1773610 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub039:1773545:1773610 [0] NCCL INFO Connected all trees +gpub039:1773545:1773610 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub039:1773545:1773610 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:1773545:1773610 [0] NCCL INFO comm 0x5171f660 rank 52 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub067:1289108:1289108 [1] NCCL INFO cudaDriverVersion 12010 +gpub067:1289108:1289108 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1289108:1289108 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1289108:1289168 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1289108:1289168 [1] NCCL INFO Using network IB +gpub067:1289108:1289168 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub067:1289108:1289168 [1] NCCL INFO Trees [0] 90/84/-1->89->88 [1] 90/-1/-1->89->88 +gpub067:1289108:1289168 [1] NCCL INFO Channel 00/0 : 89[46000] -> 90[85000] via P2P/IPC +gpub067:1289108:1289168 [1] NCCL INFO Channel 01/0 : 89[46000] -> 90[85000] via P2P/IPC +gpub067:1289108:1289168 [1] NCCL INFO Connected all rings +gpub067:1289108:1289168 [1] NCCL INFO Channel 00/0 : 84[7000] -> 89[46000] [receive] via NET/IB/0 +gpub067:1289108:1289168 [1] NCCL INFO Channel 00/0 : 89[46000] -> 84[7000] [send] via NET/IB/0 +gpub064:1376669:1376669 [0] NCCL INFO cudaDriverVersion 12010 +gpub064:1376669:1376669 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.164<0> +gpub064:1376669:1376669 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub064:1376669:1376734 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.164<0> +gpub064:1376669:1376734 [0] NCCL INFO Using network IB +gpub064:1376669:1376734 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub064:1376669:1376734 [0] NCCL INFO Trees [0] 77/-1/-1->76->72 [1] 77/68/-1->76->92 +gpub064:1376669:1376734 [0] NCCL INFO Channel 00/0 : 75[c7000] -> 76[7000] [receive] via NET/IB/0 +gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 75[c7000] -> 76[7000] [receive] via NET/IB/0 +gpub064:1376669:1376734 [0] NCCL INFO Channel 00/0 : 76[7000] -> 77[46000] via P2P/IPC +gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 76[7000] -> 77[46000] via P2P/IPC +gpub064:1376669:1376734 [0] NCCL INFO Connected all rings +gpub037:1358538:1358599 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub037:1358538:1358599 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub037:1358538:1358599 [0] NCCL INFO Connected all trees +gpub037:1358538:1358599 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub037:1358538:1358599 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1358538:1358599 [0] NCCL INFO comm 0x4fcb02a0 rank 44 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub089:1443529:1443529 [3] NCCL INFO cudaDriverVersion 12010 +gpub089:1443529:1443529 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:1443529:1443529 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:1443529:1443887 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.189<0> +gpub089:1443529:1443887 [3] NCCL INFO Using network IB +gpub089:1443529:1443887 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub089:1443529:1443887 [3] NCCL INFO Trees [0] -1/-1/-1->119->118 [1] -1/-1/-1->119->118 +gpub089:1443529:1443887 [3] NCCL INFO Channel 00/0 : 119[c7000] -> 120[7000] [send] via NET/IB/0 +gpub089:1443529:1443887 [3] NCCL INFO Channel 01/0 : 119[c7000] -> 120[7000] [send] via NET/IB/0 +gpub089:1443529:1443887 [3] NCCL INFO Connected all rings +gpub089:1443529:1443887 [3] NCCL INFO Channel 00/0 : 119[c7000] -> 118[85000] via P2P/IPC +gpub089:1443529:1443887 [3] NCCL INFO Channel 01/0 : 119[c7000] -> 118[85000] via P2P/IPC +gpub067:1289108:1289168 [1] NCCL INFO Channel 00/0 : 89[46000] -> 88[7000] via P2P/IPC +gpub067:1289108:1289168 [1] NCCL INFO Channel 01/0 : 89[46000] -> 88[7000] via P2P/IPC +gpub067:1289108:1289168 [1] NCCL INFO Connected all trees +gpub067:1289108:1289168 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub067:1289108:1289168 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1289108:1289168 [1] NCCL INFO comm 0x519a0210 rank 89 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub064:1376669:1376734 [0] NCCL INFO Channel 00/0 : 72[7000] -> 76[7000] [receive] via NET/IB/0 +gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 68[7000] -> 76[7000] [receive] via NET/IB/0 +gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 76[7000] -> 92[7000] [send] via NET/IB/0 +gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 92[7000] -> 76[7000] [receive] via NET/IB/0 +gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 76[7000] -> 68[7000] [send] via NET/IB/0 +gpub064:1376669:1376734 [0] NCCL INFO Channel 00/0 : 76[7000] -> 72[7000] [send] via NET/IB/0 +gpub064:1376669:1376734 [0] NCCL INFO Connected all trees +gpub064:1376669:1376734 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub064:1376669:1376734 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub064:1376669:1376734 [0] NCCL INFO comm 0x8d2203f0 rank 76 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub089:1443529:1443887 [3] NCCL INFO Connected all trees +gpub089:1443529:1443887 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub089:1443529:1443887 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:1443529:1443887 [3] NCCL INFO comm 0x8b1a64f0 rank 119 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub009:1313742:1313742 [3] NCCL INFO cudaDriverVersion 12010 +gpub009:1313742:1313742 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0> +gpub009:1313742:1313742 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub009:1313742:1313798 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.109<0> +gpub009:1313742:1313798 [3] NCCL INFO Using network IB +gpub009:1313742:1313798 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub009:1313742:1313798 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub009:1313742:1313798 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub009:1313742:1313798 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub009:1313742:1313798 [3] NCCL INFO Connected all rings +gpub009:1313742:1313798 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub009:1313742:1313798 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub009:1313742:1313798 [3] NCCL INFO Connected all trees +gpub009:1313742:1313798 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub009:1313742:1313798 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub009:1313742:1313798 [3] NCCL INFO comm 0x9d5ff680 rank 7 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub032:2709148:2709148 [0] NCCL INFO cudaDriverVersion 12010 +gpub032:2709148:2709148 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:2709148:2709148 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:2709148:2709212 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:2709148:2709212 [0] NCCL INFO Using network IB +gpub032:2709148:2709212 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub032:2709148:2709212 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub032:2709148:2709212 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub032:2709148:2709212 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub032:2709148:2709212 [0] NCCL INFO Connected all rings +gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub032:2709148:2709212 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub032:2709148:2709212 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub032:2709148:2709212 [0] NCCL INFO Connected all trees +gpub032:2709148:2709212 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub032:2709148:2709212 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:2709148:2709212 [0] NCCL INFO comm 0x9c01580 rank 36 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub039:1773547:1773547 [2] NCCL INFO cudaDriverVersion 12010 +gpub039:1773547:1773547 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:1773547:1773547 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:1773547:1773611 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:1773547:1773611 [2] NCCL INFO Using network IB +gpub039:1773547:1773611 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub039:1773547:1773611 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub039:1773547:1773611 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub039:1773547:1773611 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub039:1773547:1773611 [2] NCCL INFO Connected all rings +gpub039:1773547:1773611 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub039:1773547:1773611 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub039:1773547:1773611 [2] NCCL INFO Connected all trees +gpub039:1773547:1773611 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub039:1773547:1773611 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:1773547:1773611 [2] NCCL INFO comm 0x516f33e0 rank 54 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub009:1313740:1313740 [1] NCCL INFO cudaDriverVersion 12010 +gpub009:1313740:1313740 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0> +gpub009:1313740:1313740 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub009:1313740:1313797 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.109<0> +gpub009:1313740:1313797 [1] NCCL INFO Using network IB +gpub009:1313740:1313797 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub009:1313740:1313797 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub009:1313740:1313797 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub009:1313740:1313797 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub009:1313740:1313797 [1] NCCL INFO Connected all rings +gpub009:1313740:1313797 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub009:1313740:1313797 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub009:1313740:1313797 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub009:1313740:1313797 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub009:1313740:1313797 [1] NCCL INFO Connected all trees +gpub009:1313740:1313797 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub009:1313740:1313797 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub009:1313740:1313797 [1] NCCL INFO comm 0x51a376d0 rank 5 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub014:1242931:1242931 [1] NCCL INFO cudaDriverVersion 12010 +gpub014:1242931:1242931 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1242931:1242931 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1242931:1242995 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1242931:1242995 [1] NCCL INFO Using network IB +gpub014:1242931:1242995 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub014:1242931:1242995 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub014:1242931:1242995 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub014:1242931:1242995 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub014:1242931:1242995 [1] NCCL INFO Connected all rings +gpub014:1242931:1242995 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub014:1242931:1242995 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub014:1242931:1242995 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub014:1242931:1242995 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub014:1242931:1242995 [1] NCCL INFO Connected all trees +gpub014:1242931:1242995 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub014:1242931:1242995 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1242931:1242995 [1] NCCL INFO comm 0xa3cf110 rank 21 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub012:1262645:1262645 [1] NCCL INFO cudaDriverVersion 12010 +gpub012:1262645:1262645 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1262645:1262645 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1262645:1262707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1262645:1262707 [1] NCCL INFO Using network IB +gpub012:1262645:1262707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub012:1262645:1262707 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub012:1262645:1262707 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub012:1262645:1262707 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub012:1262645:1262707 [1] NCCL INFO Connected all rings +gpub012:1262645:1262707 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub012:1262645:1262707 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub012:1262645:1262707 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub012:1262645:1262707 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub012:1262645:1262707 [1] NCCL INFO Connected all trees +gpub012:1262645:1262707 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub012:1262645:1262707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1262645:1262707 [1] NCCL INFO comm 0x8bc40190 rank 13 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub059:1722912:1722912 [3] NCCL INFO cudaDriverVersion 12010 +gpub059:1722912:1722912 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:1722912:1722912 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:1722912:1722976 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0> +gpub059:1722912:1722976 [3] NCCL INFO Using network IB +gpub059:1722912:1722976 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub059:1722912:1722976 [3] NCCL INFO Trees [0] -1/-1/-1->71->70 [1] -1/-1/-1->71->70 +gpub059:1722912:1722976 [3] NCCL INFO Channel 00/0 : 71[c7000] -> 72[7000] [send] via NET/IB/0 +gpub059:1722912:1722976 [3] NCCL INFO Channel 01/0 : 71[c7000] -> 72[7000] [send] via NET/IB/0 +gpub059:1722912:1722976 [3] NCCL INFO Connected all rings +gpub059:1722912:1722976 [3] NCCL INFO Channel 00/0 : 71[c7000] -> 70[85000] via P2P/IPC +gpub059:1722912:1722976 [3] NCCL INFO Channel 01/0 : 71[c7000] -> 70[85000] via P2P/IPC +gpub059:1722912:1722976 [3] NCCL INFO Connected all trees +gpub059:1722912:1722976 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub059:1722912:1722976 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:1722912:1722976 [3] NCCL INFO comm 0x50e6bd70 rank 71 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub059:1722910:1722910 [1] NCCL INFO cudaDriverVersion 12010 +gpub059:1722910:1722910 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:1722910:1722910 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:1722910:1722978 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0> +gpub059:1722910:1722978 [1] NCCL INFO Using network IB +gpub059:1722910:1722978 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub059:1722910:1722978 [1] NCCL INFO Trees [0] 70/-1/-1->69->68 [1] 70/72/-1->69->68 +gpub059:1722910:1722978 [1] NCCL INFO Channel 00/0 : 69[46000] -> 70[85000] via P2P/IPC +gpub059:1722910:1722978 [1] NCCL INFO Channel 01/0 : 69[46000] -> 70[85000] via P2P/IPC +gpub059:1722910:1722978 [1] NCCL INFO Connected all rings +gpub059:1722910:1722978 [1] NCCL INFO Channel 01/0 : 69[46000] -> 72[7000] [send] via NET/IB/0 +gpub059:1722910:1722978 [1] NCCL INFO Channel 01/0 : 72[7000] -> 69[46000] [receive] via NET/IB/0 +gpub059:1722910:1722978 [1] NCCL INFO Channel 00/0 : 69[46000] -> 68[7000] via P2P/IPC +gpub059:1722910:1722978 [1] NCCL INFO Channel 01/0 : 69[46000] -> 68[7000] via P2P/IPC +gpub059:1722910:1722978 [1] NCCL INFO Connected all trees +gpub059:1722910:1722978 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub059:1722910:1722978 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:1722910:1722978 [1] NCCL INFO comm 0x4fd07e80 rank 69 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub059:1722909:1722909 [0] NCCL INFO cudaDriverVersion 12010 +gpub059:1722909:1722909 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:1722909:1722909 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:1722909:1722977 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0> +gpub059:1722909:1722977 [0] NCCL INFO Using network IB +gpub059:1722909:1722977 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub059:1722909:1722977 [0] NCCL INFO Trees [0] 69/-1/-1->68->73 [1] 69/64/-1->68->76 +gpub059:1722909:1722977 [0] NCCL INFO Channel 00/0 : 67[c7000] -> 68[7000] [receive] via NET/IB/0 +gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 67[c7000] -> 68[7000] [receive] via NET/IB/0 +gpub059:1722909:1722977 [0] NCCL INFO Channel 00/0 : 68[7000] -> 69[46000] via P2P/IPC +gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 68[7000] -> 69[46000] via P2P/IPC +gpub059:1722909:1722977 [0] NCCL INFO Connected all rings +gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 64[7000] -> 68[7000] [receive] via NET/IB/0 +gpub059:1722909:1722977 [0] NCCL INFO Channel 00/0 : 68[7000] -> 73[46000] [send] via NET/IB/0 +gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 68[7000] -> 76[7000] [send] via NET/IB/0 +gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 76[7000] -> 68[7000] [receive] via NET/IB/0 +gpub059:1722909:1722977 [0] NCCL INFO Channel 00/0 : 73[46000] -> 68[7000] [receive] via NET/IB/0 +gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 68[7000] -> 64[7000] [send] via NET/IB/0 +gpub059:1722909:1722977 [0] NCCL INFO Connected all trees +gpub059:1722909:1722977 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub059:1722909:1722977 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:1722909:1722977 [0] NCCL INFO comm 0x4f5becc0 rank 68 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub088:1265112:1265112 [2] NCCL INFO cudaDriverVersion 12010 +gpub088:1265112:1265112 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0> +gpub088:1265112:1265112 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub088:1265112:1265182 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.188<0> +gpub088:1265112:1265182 [2] NCCL INFO Using network IB +gpub088:1265112:1265182 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub088:1265112:1265182 [2] NCCL INFO Trees [0] 115/-1/-1->114->113 [1] 115/-1/-1->114->113 +gpub088:1265112:1265182 [2] NCCL INFO Channel 00/0 : 114[85000] -> 115[c7000] via P2P/IPC +gpub088:1265112:1265182 [2] NCCL INFO Channel 01/0 : 114[85000] -> 115[c7000] via P2P/IPC +gpub088:1265112:1265182 [2] NCCL INFO Connected all rings +gpub088:1265112:1265182 [2] NCCL INFO Channel 00/0 : 114[85000] -> 113[46000] via P2P/IPC +gpub088:1265112:1265182 [2] NCCL INFO Channel 01/0 : 114[85000] -> 113[46000] via P2P/IPC +gpub088:1265112:1265182 [2] NCCL INFO Connected all trees +gpub088:1265112:1265182 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub088:1265112:1265182 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub088:1265112:1265182 [2] NCCL INFO comm 0x8c4ab250 rank 114 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub088:1265110:1265110 [0] NCCL INFO cudaDriverVersion 12010 +gpub088:1265110:1265110 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0> +gpub088:1265110:1265110 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub088:1265110:1265183 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.188<0> +gpub088:1265110:1265183 [0] NCCL INFO Using network IB +gpub088:1265110:1265183 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub088:1265110:1265183 [0] NCCL INFO Trees [0] 113/120/-1->112->96 [1] 113/-1/-1->112->116 +gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 111[c7000] -> 112[7000] [receive] via NET/IB/0 +gpub088:1265110:1265183 [0] NCCL INFO Channel 01/0 : 111[c7000] -> 112[7000] [receive] via NET/IB/0 +gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 112[7000] -> 113[46000] via P2P/IPC +gpub088:1265110:1265183 [0] NCCL INFO Channel 01/0 : 112[7000] -> 113[46000] via P2P/IPC +gpub088:1265110:1265183 [0] NCCL INFO Connected all rings +gpub088:1265110:1265183 [0] NCCL INFO Channel 01/0 : 112[7000] -> 116[7000] [send] via NET/IB/0 +gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 112[7000] -> 120[7000] [send] via NET/IB/0 +gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 96[7000] -> 112[7000] [receive] via NET/IB/0 +gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 112[7000] -> 96[7000] [send] via NET/IB/0 +gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 120[7000] -> 112[7000] [receive] via NET/IB/0 +gpub088:1265110:1265183 [0] NCCL INFO Channel 01/0 : 116[7000] -> 112[7000] [receive] via NET/IB/0 +gpub088:1265110:1265183 [0] NCCL INFO Connected all trees +gpub088:1265110:1265183 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub088:1265110:1265183 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub088:1265110:1265183 [0] NCCL INFO comm 0xa9d75ce0 rank 112 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub038:1419973:1419973 [3] NCCL INFO cudaDriverVersion 12010 +gpub038:1419973:1419973 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0> +gpub038:1419973:1419973 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub038:1419973:1420037 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0> +gpub038:1419973:1420037 [3] NCCL INFO Using network IB +gpub038:1419973:1420037 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub038:1419973:1420037 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub038:1419973:1420037 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub038:1419973:1420037 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub038:1419973:1420037 [3] NCCL INFO Connected all rings +gpub038:1419973:1420037 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub038:1419973:1420037 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub038:1419973:1420037 [3] NCCL INFO Connected all trees +gpub038:1419973:1420037 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub038:1419973:1420037 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub038:1419973:1420037 [3] NCCL INFO comm 0x93398f0 rank 51 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub038:1419971:1419971 [1] NCCL INFO cudaDriverVersion 12010 +gpub038:1419971:1419971 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0> +gpub038:1419971:1419971 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub038:1419971:1420034 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0> +gpub038:1419971:1420034 [1] NCCL INFO Using network IB +gpub038:1419971:1420034 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub038:1419971:1420034 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub038:1419971:1420034 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub038:1419971:1420034 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub038:1419971:1420034 [1] NCCL INFO Connected all rings +gpub038:1419971:1420034 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub038:1419971:1420034 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub038:1419971:1420034 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub038:1419971:1420034 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub038:1419971:1420034 [1] NCCL INFO Connected all trees +gpub038:1419971:1420034 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub038:1419971:1420034 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub038:1419971:1420034 [1] NCCL INFO comm 0xb5e5ede0 rank 49 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub038:1419972:1419972 [2] NCCL INFO cudaDriverVersion 12010 +gpub038:1419972:1419972 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0> +gpub038:1419972:1419972 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub038:1419972:1420036 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0> +gpub038:1419972:1420036 [2] NCCL INFO Using network IB +gpub038:1419972:1420036 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub038:1419972:1420036 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub038:1419972:1420036 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub038:1419972:1420036 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub038:1419972:1420036 [2] NCCL INFO Connected all rings +gpub038:1419972:1420036 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub038:1419972:1420036 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub038:1419972:1420036 [2] NCCL INFO Connected all trees +gpub038:1419972:1420036 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub038:1419972:1420036 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub038:1419972:1420036 [2] NCCL INFO comm 0x94b05e40 rank 50 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub088:1265111:1265111 [1] NCCL INFO cudaDriverVersion 12010 +gpub088:1265111:1265111 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0> +gpub088:1265111:1265111 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub088:1265111:1265184 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.188<0> +gpub088:1265111:1265184 [1] NCCL INFO Using network IB +gpub088:1265111:1265184 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub088:1265111:1265184 [1] NCCL INFO Trees [0] 114/104/-1->113->112 [1] 114/-1/-1->113->112 +gpub088:1265111:1265184 [1] NCCL INFO Channel 00/0 : 113[46000] -> 114[85000] via P2P/IPC +gpub088:1265111:1265184 [1] NCCL INFO Channel 01/0 : 113[46000] -> 114[85000] via P2P/IPC +gpub088:1265111:1265184 [1] NCCL INFO Connected all rings +gpub088:1265111:1265184 [1] NCCL INFO Channel 00/0 : 104[7000] -> 113[46000] [receive] via NET/IB/0 +gpub088:1265111:1265184 [1] NCCL INFO Channel 00/0 : 113[46000] -> 104[7000] [send] via NET/IB/0 +gpub088:1265111:1265184 [1] NCCL INFO Channel 00/0 : 113[46000] -> 112[7000] via P2P/IPC +gpub088:1265111:1265184 [1] NCCL INFO Channel 01/0 : 113[46000] -> 112[7000] via P2P/IPC +gpub088:1265111:1265184 [1] NCCL INFO Connected all trees +gpub088:1265111:1265184 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub088:1265111:1265184 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub088:1265111:1265184 [1] NCCL INFO comm 0xa8f2e890 rank 113 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub088:1265113:1265113 [3] NCCL INFO cudaDriverVersion 12010 +gpub088:1265113:1265113 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0> +gpub088:1265113:1265113 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub088:1265113:1265181 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.188<0> +gpub088:1265113:1265181 [3] NCCL INFO Using network IB +gpub088:1265113:1265181 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub088:1265113:1265181 [3] NCCL INFO Trees [0] -1/-1/-1->115->114 [1] -1/-1/-1->115->114 +gpub088:1265113:1265181 [3] NCCL INFO Channel 00/0 : 115[c7000] -> 116[7000] [send] via NET/IB/0 +gpub088:1265113:1265181 [3] NCCL INFO Channel 01/0 : 115[c7000] -> 116[7000] [send] via NET/IB/0 +gpub088:1265113:1265181 [3] NCCL INFO Connected all rings +gpub088:1265113:1265181 [3] NCCL INFO Channel 00/0 : 115[c7000] -> 114[85000] via P2P/IPC +gpub088:1265113:1265181 [3] NCCL INFO Channel 01/0 : 115[c7000] -> 114[85000] via P2P/IPC +gpub088:1265113:1265181 [3] NCCL INFO Connected all trees +gpub088:1265113:1265181 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub088:1265113:1265181 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub088:1265113:1265181 [3] NCCL INFO comm 0xb90f03c0 rank 115 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub038:1419970:1419970 [0] NCCL INFO cudaDriverVersion 12010 +gpub038:1419970:1419970 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0> +gpub038:1419970:1419970 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub038:1419970:1420035 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0> +gpub038:1419970:1420035 [0] NCCL INFO Using network IB +gpub038:1419970:1420035 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub038:1419970:1420035 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub038:1419970:1420035 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub038:1419970:1420035 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub038:1419970:1420035 [0] NCCL INFO Connected all rings +gpub038:1419970:1420035 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub038:1419970:1420035 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub038:1419970:1420035 [0] NCCL INFO Connected all trees +gpub038:1419970:1420035 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub038:1419970:1420035 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub038:1419970:1420035 [0] NCCL INFO comm 0x8b623c00 rank 48 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub083:326512:326512 [3] NCCL INFO cudaDriverVersion 12010 +gpub083:326512:326512 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:326512:326512 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:326512:326570 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.183<0> +gpub083:326512:326570 [3] NCCL INFO Using network IB +gpub083:326512:326570 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub083:326512:326570 [3] NCCL INFO Trees [0] -1/-1/-1->107->106 [1] -1/-1/-1->107->106 +gpub083:326512:326570 [3] NCCL INFO Channel 00/0 : 107[c7000] -> 108[7000] [send] via NET/IB/0 +gpub083:326512:326570 [3] NCCL INFO Channel 01/0 : 107[c7000] -> 108[7000] [send] via NET/IB/0 +gpub083:326512:326570 [3] NCCL INFO Connected all rings +gpub083:326512:326570 [3] NCCL INFO Channel 00/0 : 107[c7000] -> 106[85000] via P2P/IPC +gpub083:326512:326570 [3] NCCL INFO Channel 01/0 : 107[c7000] -> 106[85000] via P2P/IPC +gpub083:326512:326570 [3] NCCL INFO Connected all trees +gpub083:326512:326570 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub083:326512:326570 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:326512:326570 [3] NCCL INFO comm 0x50bb38f0 rank 107 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub083:326511:326511 [2] NCCL INFO cudaDriverVersion 12010 +gpub083:326511:326511 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:326511:326511 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:326511:326571 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.183<0> +gpub083:326511:326571 [2] NCCL INFO Using network IB +gpub083:326511:326571 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub083:326511:326571 [2] NCCL INFO Trees [0] 107/-1/-1->106->105 [1] 107/-1/-1->106->105 +gpub083:326511:326571 [2] NCCL INFO Channel 00/0 : 106[85000] -> 107[c7000] via P2P/IPC +gpub083:326511:326571 [2] NCCL INFO Channel 01/0 : 106[85000] -> 107[c7000] via P2P/IPC +gpub083:326511:326571 [2] NCCL INFO Connected all rings +gpub083:326511:326571 [2] NCCL INFO Channel 00/0 : 106[85000] -> 105[46000] via P2P/IPC +gpub083:326511:326571 [2] NCCL INFO Channel 01/0 : 106[85000] -> 105[46000] via P2P/IPC +gpub083:326511:326571 [2] NCCL INFO Connected all trees +gpub083:326511:326571 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub083:326511:326571 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:326511:326571 [2] NCCL INFO comm 0x8cf83ae0 rank 106 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub083:326509:326509 [0] NCCL INFO cudaDriverVersion 12010 +gpub083:326509:326509 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:326509:326509 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:326509:326573 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.183<0> +gpub083:326509:326573 [0] NCCL INFO Using network IB +gpub083:326509:326573 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub083:326509:326573 [0] NCCL INFO Trees [0] 105/108/-1->104->113 [1] 105/-1/-1->104->101 +gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 103[c7000] -> 104[7000] [receive] via NET/IB/0 +gpub083:326509:326573 [0] NCCL INFO Channel 01/0 : 103[c7000] -> 104[7000] [receive] via NET/IB/0 +gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 104[7000] -> 105[46000] via P2P/IPC +gpub083:326509:326573 [0] NCCL INFO Channel 01/0 : 104[7000] -> 105[46000] via P2P/IPC +gpub083:326509:326573 [0] NCCL INFO Connected all rings +gpub083:326509:326573 [0] NCCL INFO Channel 01/0 : 101[46000] -> 104[7000] [receive] via NET/IB/0 +gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 104[7000] -> 108[7000] [send] via NET/IB/0 +gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 104[7000] -> 113[46000] [send] via NET/IB/0 +gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 113[46000] -> 104[7000] [receive] via NET/IB/0 +gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 108[7000] -> 104[7000] [receive] via NET/IB/0 +gpub083:326509:326573 [0] NCCL INFO Channel 01/0 : 104[7000] -> 101[46000] [send] via NET/IB/0 +gpub083:326509:326573 [0] NCCL INFO Connected all trees +gpub083:326509:326573 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub083:326509:326573 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:326509:326573 [0] NCCL INFO comm 0x50391690 rank 104 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub013:1454152:1454152 [0] NCCL INFO cudaDriverVersion 12010 +gpub013:1454152:1454152 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1454152:1454152 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1454152:1454217 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1454152:1454217 [0] NCCL INFO Using network IB +gpub013:1454152:1454217 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub013:1454152:1454217 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub013:1454152:1454217 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub013:1454152:1454217 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub013:1454152:1454217 [0] NCCL INFO Connected all rings +gpub013:1454152:1454217 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub013:1454152:1454217 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub013:1454152:1454217 [0] NCCL INFO Connected all trees +gpub013:1454152:1454217 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub013:1454152:1454217 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1454152:1454217 [0] NCCL INFO comm 0xbf779d50 rank 16 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub013:1454154:1454154 [2] NCCL INFO cudaDriverVersion 12010 +gpub013:1454154:1454154 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1454154:1454154 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1454154:1454216 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1454154:1454216 [2] NCCL INFO Using network IB +gpub013:1454154:1454216 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub013:1454154:1454216 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub013:1454154:1454216 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub013:1454154:1454216 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub013:1454154:1454216 [2] NCCL INFO Connected all rings +gpub013:1454154:1454216 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub013:1454154:1454216 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub013:1454154:1454216 [2] NCCL INFO Connected all trees +gpub013:1454154:1454216 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub013:1454154:1454216 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1454154:1454216 [2] NCCL INFO comm 0x8c14b260 rank 18 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub065:1317230:1317230 [3] NCCL INFO cudaDriverVersion 12010 +gpub065:1317230:1317230 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.165<0> +gpub065:1317230:1317230 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub065:1317230:1317295 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.165<0> +gpub065:1317230:1317295 [3] NCCL INFO Using network IB +gpub065:1317230:1317295 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub065:1317230:1317295 [3] NCCL INFO Trees [0] -1/-1/-1->83->82 [1] -1/-1/-1->83->82 +gpub065:1317230:1317295 [3] NCCL INFO Channel 00/0 : 83[c7000] -> 84[7000] [send] via NET/IB/0 +gpub065:1317230:1317295 [3] NCCL INFO Channel 01/0 : 83[c7000] -> 84[7000] [send] via NET/IB/0 +gpub065:1317230:1317295 [3] NCCL INFO Connected all rings +gpub065:1317230:1317295 [3] NCCL INFO Channel 00/0 : 83[c7000] -> 82[85000] via P2P/IPC +gpub065:1317230:1317295 [3] NCCL INFO Channel 01/0 : 83[c7000] -> 82[85000] via P2P/IPC +gpub065:1317230:1317295 [3] NCCL INFO Connected all trees +gpub065:1317230:1317295 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub065:1317230:1317295 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub065:1317230:1317295 [3] NCCL INFO comm 0x8ebce5d0 rank 83 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub065:1317229:1317229 [2] NCCL INFO cudaDriverVersion 12010 +gpub065:1317229:1317229 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.165<0> +gpub065:1317229:1317229 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub065:1317229:1317296 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.165<0> +gpub065:1317229:1317296 [2] NCCL INFO Using network IB +gpub065:1317229:1317296 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub065:1317229:1317296 [2] NCCL INFO Trees [0] 83/-1/-1->82->81 [1] 83/-1/-1->82->81 +gpub065:1317229:1317296 [2] NCCL INFO Channel 00/0 : 82[85000] -> 83[c7000] via P2P/IPC +gpub065:1317229:1317296 [2] NCCL INFO Channel 01/0 : 82[85000] -> 83[c7000] via P2P/IPC +gpub065:1317229:1317296 [2] NCCL INFO Connected all rings +gpub065:1317229:1317296 [2] NCCL INFO Channel 00/0 : 82[85000] -> 81[46000] via P2P/IPC +gpub065:1317229:1317296 [2] NCCL INFO Channel 01/0 : 82[85000] -> 81[46000] via P2P/IPC +gpub065:1317229:1317296 [2] NCCL INFO Connected all trees +gpub065:1317229:1317296 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub065:1317229:1317296 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub065:1317229:1317296 [2] NCCL INFO comm 0x50c3e4a0 rank 82 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub013:1454155:1454155 [3] NCCL INFO cudaDriverVersion 12010 +gpub013:1454155:1454155 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1454155:1454155 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1454155:1454218 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1454155:1454218 [3] NCCL INFO Using network IB +gpub013:1454155:1454218 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub013:1454155:1454218 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub013:1454155:1454218 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub013:1454155:1454218 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub013:1454155:1454218 [3] NCCL INFO Connected all rings +gpub013:1454155:1454218 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub013:1454155:1454218 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub013:1454155:1454218 [3] NCCL INFO Connected all trees +gpub013:1454155:1454218 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub013:1454155:1454218 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1454155:1454218 [3] NCCL INFO comm 0xaba06b70 rank 19 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub083:326510:326510 [1] NCCL INFO cudaDriverVersion 12010 +gpub083:326510:326510 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:326510:326510 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:326510:326572 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.183<0> +gpub083:326510:326572 [1] NCCL INFO Using network IB +gpub083:326510:326572 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub083:326510:326572 [1] NCCL INFO Trees [0] 106/100/-1->105->104 [1] 106/-1/-1->105->104 +gpub083:326510:326572 [1] NCCL INFO Channel 00/0 : 105[46000] -> 106[85000] via P2P/IPC +gpub083:326510:326572 [1] NCCL INFO Channel 01/0 : 105[46000] -> 106[85000] via P2P/IPC +gpub083:326510:326572 [1] NCCL INFO Connected all rings +gpub083:326510:326572 [1] NCCL INFO Channel 00/0 : 100[7000] -> 105[46000] [receive] via NET/IB/0 +gpub083:326510:326572 [1] NCCL INFO Channel 00/0 : 105[46000] -> 100[7000] [send] via NET/IB/0 +gpub083:326510:326572 [1] NCCL INFO Channel 00/0 : 105[46000] -> 104[7000] via P2P/IPC +gpub083:326510:326572 [1] NCCL INFO Channel 01/0 : 105[46000] -> 104[7000] via P2P/IPC +gpub083:326510:326572 [1] NCCL INFO Connected all trees +gpub083:326510:326572 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub083:326510:326572 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:326510:326572 [1] NCCL INFO comm 0xb489cca0 rank 105 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub065:1317228:1317228 [1] NCCL INFO cudaDriverVersion 12010 +gpub065:1317228:1317228 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.165<0> +gpub065:1317228:1317228 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub065:1317228:1317297 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.165<0> +gpub065:1317228:1317297 [1] NCCL INFO Using network IB +gpub065:1317228:1317297 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub065:1317228:1317297 [1] NCCL INFO Trees [0] 82/72/-1->81->80 [1] 82/-1/-1->81->80 +gpub065:1317228:1317297 [1] NCCL INFO Channel 00/0 : 81[46000] -> 82[85000] via P2P/IPC +gpub065:1317228:1317297 [1] NCCL INFO Channel 01/0 : 81[46000] -> 82[85000] via P2P/IPC +gpub065:1317228:1317297 [1] NCCL INFO Connected all rings +gpub065:1317228:1317297 [1] NCCL INFO Channel 00/0 : 72[7000] -> 81[46000] [receive] via NET/IB/0 +gpub065:1317228:1317297 [1] NCCL INFO Channel 00/0 : 81[46000] -> 72[7000] [send] via NET/IB/0 +gpub065:1317228:1317297 [1] NCCL INFO Channel 00/0 : 81[46000] -> 80[7000] via P2P/IPC +gpub065:1317228:1317297 [1] NCCL INFO Channel 01/0 : 81[46000] -> 80[7000] via P2P/IPC +gpub065:1317228:1317297 [1] NCCL INFO Connected all trees +gpub065:1317228:1317297 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub065:1317228:1317297 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub065:1317228:1317297 [1] NCCL INFO comm 0x9cb3f50 rank 81 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub013:1454153:1454153 [1] NCCL INFO cudaDriverVersion 12010 +gpub013:1454153:1454153 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1454153:1454153 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1454153:1454219 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1454153:1454219 [1] NCCL INFO Using network IB +gpub013:1454153:1454219 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub013:1454153:1454219 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub013:1454153:1454219 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub013:1454153:1454219 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub013:1454153:1454219 [1] NCCL INFO Connected all rings +gpub013:1454153:1454219 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub013:1454153:1454219 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub013:1454153:1454219 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub013:1454153:1454219 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub013:1454153:1454219 [1] NCCL INFO Connected all trees +gpub013:1454153:1454219 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub013:1454153:1454219 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1454153:1454219 [1] NCCL INFO comm 0x504a7bd0 rank 17 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub075:323056:323056 [1] NCCL INFO cudaDriverVersion 12010 +gpub075:323056:323056 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0> +gpub075:323056:323056 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub075:323056:323126 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0> +gpub075:323056:323126 [1] NCCL INFO Using network IB +gpub075:323056:323126 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub075:323056:323126 [1] NCCL INFO Trees [0] 98/80/-1->97->96 [1] 98/-1/-1->97->96 +gpub075:323056:323126 [1] NCCL INFO Channel 00/0 : 97[46000] -> 98[85000] via P2P/IPC +gpub075:323056:323126 [1] NCCL INFO Channel 01/0 : 97[46000] -> 98[85000] via P2P/IPC +gpub075:323056:323126 [1] NCCL INFO Connected all rings +gpub075:323056:323126 [1] NCCL INFO Channel 00/0 : 80[7000] -> 97[46000] [receive] via NET/IB/0 +gpub075:323056:323126 [1] NCCL INFO Channel 00/0 : 97[46000] -> 80[7000] [send] via NET/IB/0 +gpub075:323056:323126 [1] NCCL INFO Channel 00/0 : 97[46000] -> 96[7000] via P2P/IPC +gpub075:323056:323126 [1] NCCL INFO Channel 01/0 : 97[46000] -> 96[7000] via P2P/IPC +gpub075:323056:323126 [1] NCCL INFO Connected all trees +gpub075:323056:323126 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub075:323056:323126 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub075:323056:323126 [1] NCCL INFO comm 0xa49e4e0 rank 97 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub065:1317227:1317227 [0] NCCL INFO cudaDriverVersion 12010 +gpub065:1317227:1317227 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.165<0> +gpub065:1317227:1317227 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub065:1317227:1317294 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.165<0> +gpub065:1317227:1317294 [0] NCCL INFO Using network IB +gpub065:1317227:1317294 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub065:1317227:1317294 [0] NCCL INFO Trees [0] 81/88/-1->80->97 [1] 81/-1/-1->80->84 +gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 79[c7000] -> 80[7000] [receive] via NET/IB/0 +gpub065:1317227:1317294 [0] NCCL INFO Channel 01/0 : 79[c7000] -> 80[7000] [receive] via NET/IB/0 +gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 80[7000] -> 81[46000] via P2P/IPC +gpub065:1317227:1317294 [0] NCCL INFO Channel 01/0 : 80[7000] -> 81[46000] via P2P/IPC +gpub065:1317227:1317294 [0] NCCL INFO Connected all rings +gpub065:1317227:1317294 [0] NCCL INFO Channel 01/0 : 80[7000] -> 84[7000] [send] via NET/IB/0 +gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 80[7000] -> 88[7000] [send] via NET/IB/0 +gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 80[7000] -> 97[46000] [send] via NET/IB/0 +gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 97[46000] -> 80[7000] [receive] via NET/IB/0 +gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 88[7000] -> 80[7000] [receive] via NET/IB/0 +gpub065:1317227:1317294 [0] NCCL INFO Channel 01/0 : 84[7000] -> 80[7000] [receive] via NET/IB/0 +gpub065:1317227:1317294 [0] NCCL INFO Connected all trees +gpub065:1317227:1317294 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub065:1317227:1317294 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub065:1317227:1317294 [0] NCCL INFO comm 0xb6488400 rank 80 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub068:1244814:1244814 [2] NCCL INFO cudaDriverVersion 12010 +gpub068:1244814:1244814 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.168<0> +gpub068:1244814:1244814 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub068:1244814:1244875 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.168<0> +gpub068:1244814:1244875 [2] NCCL INFO Using network IB +gpub068:1244814:1244875 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub068:1244814:1244875 [2] NCCL INFO Trees [0] 95/-1/-1->94->93 [1] 95/-1/-1->94->93 +gpub068:1244814:1244875 [2] NCCL INFO Channel 00/0 : 94[85000] -> 95[c7000] via P2P/IPC +gpub068:1244814:1244875 [2] NCCL INFO Channel 01/0 : 94[85000] -> 95[c7000] via P2P/IPC +gpub068:1244814:1244875 [2] NCCL INFO Connected all rings +gpub068:1244814:1244875 [2] NCCL INFO Channel 00/0 : 94[85000] -> 93[46000] via P2P/IPC +gpub068:1244814:1244875 [2] NCCL INFO Channel 01/0 : 94[85000] -> 93[46000] via P2P/IPC +gpub068:1244814:1244875 [2] NCCL INFO Connected all trees +gpub068:1244814:1244875 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub068:1244814:1244875 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub068:1244814:1244875 [2] NCCL INFO comm 0xbeb7020 rank 94 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub068:1244815:1244815 [3] NCCL INFO cudaDriverVersion 12010 +gpub068:1244815:1244815 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.168<0> +gpub068:1244815:1244815 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub068:1244815:1244878 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.168<0> +gpub068:1244815:1244878 [3] NCCL INFO Using network IB +gpub068:1244815:1244878 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub068:1244815:1244878 [3] NCCL INFO Trees [0] -1/-1/-1->95->94 [1] -1/-1/-1->95->94 +gpub068:1244815:1244878 [3] NCCL INFO Channel 00/0 : 95[c7000] -> 96[7000] [send] via NET/IB/0 +gpub068:1244815:1244878 [3] NCCL INFO Channel 01/0 : 95[c7000] -> 96[7000] [send] via NET/IB/0 +gpub068:1244815:1244878 [3] NCCL INFO Connected all rings +gpub068:1244815:1244878 [3] NCCL INFO Channel 00/0 : 95[c7000] -> 94[85000] via P2P/IPC +gpub068:1244815:1244878 [3] NCCL INFO Channel 01/0 : 95[c7000] -> 94[85000] via P2P/IPC +gpub068:1244815:1244878 [3] NCCL INFO Connected all trees +gpub068:1244815:1244878 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub068:1244815:1244878 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub068:1244815:1244878 [3] NCCL INFO comm 0x8e4e7950 rank 95 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub058:1406058:1406058 [2] NCCL INFO cudaDriverVersion 12010 +gpub058:1406058:1406058 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:1406058:1406058 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:1406058:1406115 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.158<0> +gpub058:1406058:1406115 [2] NCCL INFO Using network IB +gpub058:1406058:1406115 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub058:1406058:1406115 [2] NCCL INFO Trees [0] 67/-1/-1->66->65 [1] 67/-1/-1->66->65 +gpub058:1406058:1406115 [2] NCCL INFO Channel 00/0 : 66[85000] -> 67[c7000] via P2P/IPC +gpub058:1406058:1406115 [2] NCCL INFO Channel 01/0 : 66[85000] -> 67[c7000] via P2P/IPC +gpub058:1406058:1406115 [2] NCCL INFO Connected all rings +gpub058:1406058:1406115 [2] NCCL INFO Channel 00/0 : 66[85000] -> 65[46000] via P2P/IPC +gpub058:1406058:1406115 [2] NCCL INFO Channel 01/0 : 66[85000] -> 65[46000] via P2P/IPC +gpub058:1406058:1406115 [2] NCCL INFO Connected all trees +gpub058:1406058:1406115 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub058:1406058:1406115 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:1406058:1406115 [2] NCCL INFO comm 0xb9766da0 rank 66 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub001:279948:280013 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:279948:280013 [0] NCCL INFO Using network IB +gpub001:279948:280013 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub001:279948:280013 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub001:279948:280013 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub001:279948:280013 [0] NCCL INFO Trees [0] 1/64/-1->0->-1 [1] 1/-1/-1->0->4 +gpub001:279948:280013 [0] NCCL INFO Channel 00/0 : 127[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:279948:280013 [0] NCCL INFO Channel 01/0 : 127[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:279948:280013 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub001:279948:280013 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub001:279948:280013 [0] NCCL INFO Connected all rings +gpub001:279948:280013 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub001:279948:280013 [0] NCCL INFO Channel 00/0 : 64[7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:279948:280013 [0] NCCL INFO Channel 00/0 : 0[7000] -> 64[7000] [send] via NET/IB/0 +gpub001:279948:280013 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:279948:280013 [0] NCCL INFO Connected all trees +gpub001:279948:280013 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub001:279948:280013 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:279948:280013 [0] NCCL INFO comm 0x4fb35be0 rank 0 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub075:323057:323057 [2] NCCL INFO cudaDriverVersion 12010 +gpub075:323057:323057 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0> +gpub075:323057:323057 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub075:323057:323128 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0> +gpub075:323057:323128 [2] NCCL INFO Using network IB +gpub075:323057:323128 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub075:323057:323128 [2] NCCL INFO Trees [0] 99/-1/-1->98->97 [1] 99/-1/-1->98->97 +gpub075:323057:323128 [2] NCCL INFO Channel 00/0 : 98[85000] -> 99[c7000] via P2P/IPC +gpub075:323057:323128 [2] NCCL INFO Channel 01/0 : 98[85000] -> 99[c7000] via P2P/IPC +gpub075:323057:323128 [2] NCCL INFO Connected all rings +gpub075:323057:323128 [2] NCCL INFO Channel 00/0 : 98[85000] -> 97[46000] via P2P/IPC +gpub075:323057:323128 [2] NCCL INFO Channel 01/0 : 98[85000] -> 97[46000] via P2P/IPC +gpub075:323057:323128 [2] NCCL INFO Connected all trees +gpub075:323057:323128 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub075:323057:323128 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub075:323057:323128 [2] NCCL INFO comm 0xb913a860 rank 98 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub061:1342467:1342467 [1] NCCL INFO cudaDriverVersion 12010 +gpub061:1342467:1342467 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:1342467:1342467 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:1342467:1342541 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.161<0> +gpub061:1342467:1342541 [1] NCCL INFO Using network IB +gpub061:1342467:1342541 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub061:1342467:1342541 [1] NCCL INFO Trees [0] 74/68/-1->73->72 [1] 74/-1/-1->73->72 +gpub061:1342467:1342541 [1] NCCL INFO Channel 00/0 : 73[46000] -> 74[85000] via P2P/IPC +gpub061:1342467:1342541 [1] NCCL INFO Channel 01/0 : 73[46000] -> 74[85000] via P2P/IPC +gpub061:1342467:1342541 [1] NCCL INFO Connected all rings +gpub061:1342467:1342541 [1] NCCL INFO Channel 00/0 : 68[7000] -> 73[46000] [receive] via NET/IB/0 +gpub061:1342467:1342541 [1] NCCL INFO Channel 00/0 : 73[46000] -> 68[7000] [send] via NET/IB/0 +gpub061:1342467:1342541 [1] NCCL INFO Channel 00/0 : 73[46000] -> 72[7000] via P2P/IPC +gpub061:1342467:1342541 [1] NCCL INFO Channel 01/0 : 73[46000] -> 72[7000] via P2P/IPC +gpub061:1342467:1342541 [1] NCCL INFO Connected all trees +gpub061:1342467:1342541 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub061:1342467:1342541 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:1342467:1342541 [1] NCCL INFO comm 0x50772b20 rank 73 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub001:279950:279950 [2] NCCL INFO cudaDriverVersion 12010 +gpub001:279950:279950 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:279950:279950 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:279950:280016 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:279950:280016 [2] NCCL INFO Using network IB +gpub001:279950:280016 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub001:279950:280016 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub001:279950:280016 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub001:279950:280016 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub001:279950:280016 [2] NCCL INFO Connected all rings +gpub001:279950:280016 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub001:279950:280016 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub001:279950:280016 [2] NCCL INFO Connected all trees +gpub001:279950:280016 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub001:279950:280016 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:279950:280016 [2] NCCL INFO comm 0x8c644f40 rank 2 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub091:1688862:1688862 [0] NCCL INFO cudaDriverVersion 12010 +gpub091:1688862:1688862 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0> +gpub091:1688862:1688862 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub091:1688862:1688930 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0> +gpub091:1688862:1688930 [0] NCCL INFO Using network IB +gpub091:1688862:1688930 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub091:1688862:1688930 [0] NCCL INFO Trees [0] 125/-1/-1->124->120 [1] 125/60/-1->124->-1 +gpub091:1688862:1688930 [0] NCCL INFO Channel 00/0 : 123[c7000] -> 124[7000] [receive] via NET/IB/0 +gpub091:1688862:1688930 [0] NCCL INFO Channel 01/0 : 123[c7000] -> 124[7000] [receive] via NET/IB/0 +gpub091:1688862:1688930 [0] NCCL INFO Channel 00/0 : 124[7000] -> 125[46000] via P2P/IPC +gpub091:1688862:1688930 [0] NCCL INFO Channel 01/0 : 124[7000] -> 125[46000] via P2P/IPC +gpub091:1688862:1688930 [0] NCCL INFO Connected all rings +gpub091:1688862:1688930 [0] NCCL INFO Channel 00/0 : 120[7000] -> 124[7000] [receive] via NET/IB/0 +gpub091:1688862:1688930 [0] NCCL INFO Channel 01/0 : 60[7000] -> 124[7000] [receive] via NET/IB/0 +gpub091:1688862:1688930 [0] NCCL INFO Channel 01/0 : 124[7000] -> 60[7000] [send] via NET/IB/0 +gpub091:1688862:1688930 [0] NCCL INFO Channel 00/0 : 124[7000] -> 120[7000] [send] via NET/IB/0 +gpub091:1688862:1688930 [0] NCCL INFO Connected all trees +gpub091:1688862:1688930 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub091:1688862:1688930 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub091:1688862:1688930 [0] NCCL INFO comm 0xb5c78140 rank 124 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub061:1342466:1342466 [0] NCCL INFO cudaDriverVersion 12010 +gpub061:1342466:1342466 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:1342466:1342466 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:1342466:1342539 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.161<0> +gpub061:1342466:1342539 [0] NCCL INFO Using network IB +gpub061:1342466:1342539 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub061:1342466:1342539 [0] NCCL INFO Trees [0] 73/76/-1->72->81 [1] 73/-1/-1->72->69 +gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 71[c7000] -> 72[7000] [receive] via NET/IB/0 +gpub061:1342466:1342539 [0] NCCL INFO Channel 01/0 : 71[c7000] -> 72[7000] [receive] via NET/IB/0 +gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 72[7000] -> 73[46000] via P2P/IPC +gpub061:1342466:1342539 [0] NCCL INFO Channel 01/0 : 72[7000] -> 73[46000] via P2P/IPC +gpub061:1342466:1342539 [0] NCCL INFO Connected all rings +gpub061:1342466:1342539 [0] NCCL INFO Channel 01/0 : 69[46000] -> 72[7000] [receive] via NET/IB/0 +gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 72[7000] -> 76[7000] [send] via NET/IB/0 +gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 72[7000] -> 81[46000] [send] via NET/IB/0 +gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 81[46000] -> 72[7000] [receive] via NET/IB/0 +gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 76[7000] -> 72[7000] [receive] via NET/IB/0 +gpub061:1342466:1342539 [0] NCCL INFO Channel 01/0 : 72[7000] -> 69[46000] [send] via NET/IB/0 +gpub061:1342466:1342539 [0] NCCL INFO Connected all trees +gpub061:1342466:1342539 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub061:1342466:1342539 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:1342466:1342539 [0] NCCL INFO comm 0x8c41510 rank 72 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub090:1179805:1179805 [2] NCCL INFO cudaDriverVersion 12010 +gpub090:1179805:1179805 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0> +gpub090:1179805:1179805 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub090:1179805:1179868 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.190<0> +gpub090:1179805:1179868 [2] NCCL INFO Using network IB +gpub090:1179805:1179868 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub090:1179805:1179868 [2] NCCL INFO Trees [0] 123/-1/-1->122->121 [1] 123/-1/-1->122->121 +gpub090:1179805:1179868 [2] NCCL INFO Channel 00/0 : 122[85000] -> 123[c7000] via P2P/IPC +gpub090:1179805:1179868 [2] NCCL INFO Channel 01/0 : 122[85000] -> 123[c7000] via P2P/IPC +gpub090:1179805:1179868 [2] NCCL INFO Connected all rings +gpub090:1179805:1179868 [2] NCCL INFO Channel 00/0 : 122[85000] -> 121[46000] via P2P/IPC +gpub090:1179805:1179868 [2] NCCL INFO Channel 01/0 : 122[85000] -> 121[46000] via P2P/IPC +gpub075:323058:323058 [3] NCCL INFO cudaDriverVersion 12010 +gpub075:323058:323058 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0> +gpub075:323058:323058 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub075:323058:323127 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0> +gpub075:323058:323127 [3] NCCL INFO Using network IB +gpub075:323058:323127 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub075:323058:323127 [3] NCCL INFO Trees [0] -1/-1/-1->99->98 [1] -1/-1/-1->99->98 +gpub075:323058:323127 [3] NCCL INFO Channel 00/0 : 99[c7000] -> 100[7000] [send] via NET/IB/0 +gpub075:323058:323127 [3] NCCL INFO Channel 01/0 : 99[c7000] -> 100[7000] [send] via NET/IB/0 +gpub075:323058:323127 [3] NCCL INFO Connected all rings +gpub075:323058:323127 [3] NCCL INFO Channel 00/0 : 99[c7000] -> 98[85000] via P2P/IPC +gpub075:323058:323127 [3] NCCL INFO Channel 01/0 : 99[c7000] -> 98[85000] via P2P/IPC +gpub090:1179805:1179868 [2] NCCL INFO Connected all trees +gpub090:1179805:1179868 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub090:1179805:1179868 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub090:1179805:1179868 [2] NCCL INFO comm 0x50f1c1b0 rank 122 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub075:323058:323127 [3] NCCL INFO Connected all trees +gpub075:323058:323127 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub075:323058:323127 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub075:323058:323127 [3] NCCL INFO comm 0x50ae89c0 rank 99 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub091:1688864:1688864 [2] NCCL INFO cudaDriverVersion 12010 +gpub091:1688864:1688864 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0> +gpub091:1688864:1688864 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub091:1688864:1688932 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0> +gpub091:1688864:1688932 [2] NCCL INFO Using network IB +gpub091:1688864:1688932 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub091:1688864:1688932 [2] NCCL INFO Trees [0] 127/-1/-1->126->125 [1] 127/-1/-1->126->125 +gpub091:1688864:1688932 [2] NCCL INFO Channel 00/0 : 126[85000] -> 127[c7000] via P2P/IPC +gpub091:1688864:1688932 [2] NCCL INFO Channel 01/0 : 126[85000] -> 127[c7000] via P2P/IPC +gpub091:1688864:1688932 [2] NCCL INFO Connected all rings +gpub091:1688864:1688932 [2] NCCL INFO Channel 00/0 : 126[85000] -> 125[46000] via P2P/IPC +gpub091:1688864:1688932 [2] NCCL INFO Channel 01/0 : 126[85000] -> 125[46000] via P2P/IPC +gpub091:1688864:1688932 [2] NCCL INFO Connected all trees +gpub091:1688864:1688932 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub091:1688864:1688932 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub091:1688864:1688932 [2] NCCL INFO comm 0x51d52410 rank 126 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub058:1406059:1406059 [3] NCCL INFO cudaDriverVersion 12010 +gpub058:1406059:1406059 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:1406059:1406059 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:1406059:1406116 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.158<0> +gpub058:1406059:1406116 [3] NCCL INFO Using network IB +gpub058:1406059:1406116 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub058:1406059:1406116 [3] NCCL INFO Trees [0] -1/-1/-1->67->66 [1] -1/-1/-1->67->66 +gpub058:1406059:1406116 [3] NCCL INFO Channel 00/0 : 67[c7000] -> 68[7000] [send] via NET/IB/0 +gpub058:1406059:1406116 [3] NCCL INFO Channel 01/0 : 67[c7000] -> 68[7000] [send] via NET/IB/0 +gpub058:1406059:1406116 [3] NCCL INFO Connected all rings +gpub058:1406059:1406116 [3] NCCL INFO Channel 00/0 : 67[c7000] -> 66[85000] via P2P/IPC +gpub058:1406059:1406116 [3] NCCL INFO Channel 01/0 : 67[c7000] -> 66[85000] via P2P/IPC +gpub058:1406059:1406116 [3] NCCL INFO Connected all trees +gpub058:1406059:1406116 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub058:1406059:1406116 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:1406059:1406116 [3] NCCL INFO comm 0x5127edf0 rank 67 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub061:1342468:1342468 [2] NCCL INFO cudaDriverVersion 12010 +gpub061:1342468:1342468 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:1342468:1342468 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:1342468:1342540 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.161<0> +gpub061:1342468:1342540 [2] NCCL INFO Using network IB +gpub061:1342468:1342540 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub061:1342468:1342540 [2] NCCL INFO Trees [0] 75/-1/-1->74->73 [1] 75/-1/-1->74->73 +gpub061:1342468:1342540 [2] NCCL INFO Channel 00/0 : 74[85000] -> 75[c7000] via P2P/IPC +gpub061:1342468:1342540 [2] NCCL INFO Channel 01/0 : 74[85000] -> 75[c7000] via P2P/IPC +gpub061:1342468:1342540 [2] NCCL INFO Connected all rings +gpub061:1342468:1342540 [2] NCCL INFO Channel 00/0 : 74[85000] -> 73[46000] via P2P/IPC +gpub061:1342468:1342540 [2] NCCL INFO Channel 01/0 : 74[85000] -> 73[46000] via P2P/IPC +gpub061:1342468:1342540 [2] NCCL INFO Connected all trees +gpub061:1342468:1342540 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub061:1342468:1342540 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:1342468:1342540 [2] NCCL INFO comm 0xa2ca84b0 rank 74 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub061:1342469:1342469 [3] NCCL INFO cudaDriverVersion 12010 +gpub061:1342469:1342469 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0> +gpub061:1342469:1342469 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub061:1342469:1342542 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.161<0> +gpub061:1342469:1342542 [3] NCCL INFO Using network IB +gpub061:1342469:1342542 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub061:1342469:1342542 [3] NCCL INFO Trees [0] -1/-1/-1->75->74 [1] -1/-1/-1->75->74 +gpub061:1342469:1342542 [3] NCCL INFO Channel 00/0 : 75[c7000] -> 76[7000] [send] via NET/IB/0 +gpub061:1342469:1342542 [3] NCCL INFO Channel 01/0 : 75[c7000] -> 76[7000] [send] via NET/IB/0 +gpub061:1342469:1342542 [3] NCCL INFO Connected all rings +gpub061:1342469:1342542 [3] NCCL INFO Channel 00/0 : 75[c7000] -> 74[85000] via P2P/IPC +gpub061:1342469:1342542 [3] NCCL INFO Channel 01/0 : 75[c7000] -> 74[85000] via P2P/IPC +gpub075:323055:323055 [0] NCCL INFO cudaDriverVersion 12010 +gpub075:323055:323055 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0> +gpub075:323055:323055 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub075:323055:323125 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0> +gpub075:323055:323125 [0] NCCL INFO Using network IB +gpub075:323055:323125 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub075:323055:323125 [0] NCCL INFO Trees [0] 97/112/-1->96->64 [1] 97/-1/-1->96->100 +gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 95[c7000] -> 96[7000] [receive] via NET/IB/0 +gpub075:323055:323125 [0] NCCL INFO Channel 01/0 : 95[c7000] -> 96[7000] [receive] via NET/IB/0 +gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 96[7000] -> 97[46000] via P2P/IPC +gpub075:323055:323125 [0] NCCL INFO Channel 01/0 : 96[7000] -> 97[46000] via P2P/IPC +gpub075:323055:323125 [0] NCCL INFO Connected all rings +gpub061:1342469:1342542 [3] NCCL INFO Connected all trees +gpub061:1342469:1342542 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub061:1342469:1342542 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub061:1342469:1342542 [3] NCCL INFO comm 0xb6128330 rank 75 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub075:323055:323125 [0] NCCL INFO Channel 01/0 : 96[7000] -> 100[7000] [send] via NET/IB/0 +gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 96[7000] -> 112[7000] [send] via NET/IB/0 +gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 64[7000] -> 96[7000] [receive] via NET/IB/0 +gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 96[7000] -> 64[7000] [send] via NET/IB/0 +gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 112[7000] -> 96[7000] [receive] via NET/IB/0 +gpub075:323055:323125 [0] NCCL INFO Channel 01/0 : 100[7000] -> 96[7000] [receive] via NET/IB/0 +gpub075:323055:323125 [0] NCCL INFO Connected all trees +gpub075:323055:323125 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub075:323055:323125 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub075:323055:323125 [0] NCCL INFO comm 0x4fef3c60 rank 96 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub090:1179803:1179803 [0] NCCL INFO cudaDriverVersion 12010 +gpub090:1179803:1179803 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0> +gpub090:1179803:1179803 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub090:1179803:1179871 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.190<0> +gpub090:1179803:1179871 [0] NCCL INFO Using network IB +gpub090:1179803:1179871 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub090:1179803:1179871 [0] NCCL INFO Trees [0] 121/124/-1->120->112 [1] 121/-1/-1->120->117 +gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 119[c7000] -> 120[7000] [receive] via NET/IB/0 +gpub090:1179803:1179871 [0] NCCL INFO Channel 01/0 : 119[c7000] -> 120[7000] [receive] via NET/IB/0 +gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 120[7000] -> 121[46000] via P2P/IPC +gpub090:1179803:1179871 [0] NCCL INFO Channel 01/0 : 120[7000] -> 121[46000] via P2P/IPC +gpub090:1179803:1179871 [0] NCCL INFO Connected all rings +gpub090:1179803:1179871 [0] NCCL INFO Channel 01/0 : 117[46000] -> 120[7000] [receive] via NET/IB/0 +gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 120[7000] -> 124[7000] [send] via NET/IB/0 +gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 112[7000] -> 120[7000] [receive] via NET/IB/0 +gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 120[7000] -> 112[7000] [send] via NET/IB/0 +gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 124[7000] -> 120[7000] [receive] via NET/IB/0 +gpub090:1179803:1179871 [0] NCCL INFO Channel 01/0 : 120[7000] -> 117[46000] [send] via NET/IB/0 +gpub090:1179803:1179871 [0] NCCL INFO Connected all trees +gpub090:1179803:1179871 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub090:1179803:1179871 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub090:1179803:1179871 [0] NCCL INFO comm 0x521aa9c0 rank 120 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub080:3990802:3990802 [3] NCCL INFO cudaDriverVersion 12010 +gpub080:3990802:3990802 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0> +gpub080:3990802:3990802 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub080:3990802:3990869 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0> +gpub080:3990802:3990869 [3] NCCL INFO Using network IB +gpub080:3990802:3990869 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub080:3990802:3990869 [3] NCCL INFO Trees [0] -1/-1/-1->103->102 [1] -1/-1/-1->103->102 +gpub080:3990802:3990869 [3] NCCL INFO Channel 00/0 : 103[c7000] -> 104[7000] [send] via NET/IB/0 +gpub080:3990802:3990869 [3] NCCL INFO Channel 01/0 : 103[c7000] -> 104[7000] [send] via NET/IB/0 +gpub080:3990802:3990869 [3] NCCL INFO Connected all rings +gpub080:3990802:3990869 [3] NCCL INFO Channel 00/0 : 103[c7000] -> 102[85000] via P2P/IPC +gpub080:3990802:3990869 [3] NCCL INFO Channel 01/0 : 103[c7000] -> 102[85000] via P2P/IPC +gpub080:3990802:3990869 [3] NCCL INFO Connected all trees +gpub080:3990802:3990869 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub080:3990802:3990869 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub080:3990802:3990869 [3] NCCL INFO comm 0xb6ed4500 rank 103 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub090:1179804:1179804 [1] NCCL INFO cudaDriverVersion 12010 +gpub090:1179804:1179804 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0> +gpub090:1179804:1179804 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub090:1179804:1179870 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.190<0> +gpub090:1179804:1179870 [1] NCCL INFO Using network IB +gpub090:1179804:1179870 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub090:1179804:1179870 [1] NCCL INFO Trees [0] 122/116/-1->121->120 [1] 122/-1/-1->121->120 +gpub090:1179804:1179870 [1] NCCL INFO Channel 00/0 : 121[46000] -> 122[85000] via P2P/IPC +gpub090:1179804:1179870 [1] NCCL INFO Channel 01/0 : 121[46000] -> 122[85000] via P2P/IPC +gpub090:1179804:1179870 [1] NCCL INFO Connected all rings +gpub090:1179804:1179870 [1] NCCL INFO Channel 00/0 : 116[7000] -> 121[46000] [receive] via NET/IB/0 +gpub090:1179804:1179870 [1] NCCL INFO Channel 00/0 : 121[46000] -> 116[7000] [send] via NET/IB/0 +gpub090:1179804:1179870 [1] NCCL INFO Channel 00/0 : 121[46000] -> 120[7000] via P2P/IPC +gpub090:1179804:1179870 [1] NCCL INFO Channel 01/0 : 121[46000] -> 120[7000] via P2P/IPC +gpub090:1179804:1179870 [1] NCCL INFO Connected all trees +gpub090:1179804:1179870 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub090:1179804:1179870 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub090:1179804:1179870 [1] NCCL INFO comm 0x50af0d60 rank 121 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub091:1688863:1688863 [1] NCCL INFO cudaDriverVersion 12010 +gpub091:1688863:1688863 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0> +gpub091:1688863:1688863 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub091:1688863:1688931 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0> +gpub091:1688863:1688931 [1] NCCL INFO Using network IB +gpub091:1688863:1688931 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub091:1688863:1688931 [1] NCCL INFO Trees [0] 126/-1/-1->125->124 [1] 126/-1/-1->125->124 +gpub091:1688863:1688931 [1] NCCL INFO Channel 00/0 : 125[46000] -> 126[85000] via P2P/IPC +gpub091:1688863:1688931 [1] NCCL INFO Channel 01/0 : 125[46000] -> 126[85000] via P2P/IPC +gpub091:1688863:1688931 [1] NCCL INFO Connected all rings +gpub091:1688863:1688931 [1] NCCL INFO Channel 00/0 : 125[46000] -> 124[7000] via P2P/IPC +gpub091:1688863:1688931 [1] NCCL INFO Channel 01/0 : 125[46000] -> 124[7000] via P2P/IPC +gpub091:1688863:1688931 [1] NCCL INFO Connected all trees +gpub091:1688863:1688931 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub091:1688863:1688931 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub091:1688863:1688931 [1] NCCL INFO comm 0x50bc8450 rank 125 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub090:1179806:1179806 [3] NCCL INFO cudaDriverVersion 12010 +gpub090:1179806:1179806 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0> +gpub090:1179806:1179806 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub090:1179806:1179869 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.190<0> +gpub090:1179806:1179869 [3] NCCL INFO Using network IB +gpub090:1179806:1179869 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub090:1179806:1179869 [3] NCCL INFO Trees [0] -1/-1/-1->123->122 [1] -1/-1/-1->123->122 +gpub090:1179806:1179869 [3] NCCL INFO Channel 00/0 : 123[c7000] -> 124[7000] [send] via NET/IB/0 +gpub090:1179806:1179869 [3] NCCL INFO Channel 01/0 : 123[c7000] -> 124[7000] [send] via NET/IB/0 +gpub090:1179806:1179869 [3] NCCL INFO Connected all rings +gpub090:1179806:1179869 [3] NCCL INFO Channel 00/0 : 123[c7000] -> 122[85000] via P2P/IPC +gpub090:1179806:1179869 [3] NCCL INFO Channel 01/0 : 123[c7000] -> 122[85000] via P2P/IPC +gpub090:1179806:1179869 [3] NCCL INFO Connected all trees +gpub090:1179806:1179869 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub090:1179806:1179869 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub090:1179806:1179869 [3] NCCL INFO comm 0x5049f1b0 rank 123 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub058:1406057:1406057 [1] NCCL INFO cudaDriverVersion 12010 +gpub058:1406057:1406057 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:1406057:1406057 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:1406057:1406117 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.158<0> +gpub058:1406057:1406117 [1] NCCL INFO Using network IB +gpub058:1406057:1406117 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub058:1406057:1406117 [1] NCCL INFO Trees [0] 66/32/-1->65->64 [1] 66/-1/-1->65->64 +gpub058:1406057:1406117 [1] NCCL INFO Channel 00/0 : 65[46000] -> 66[85000] via P2P/IPC +gpub058:1406057:1406117 [1] NCCL INFO Channel 01/0 : 65[46000] -> 66[85000] via P2P/IPC +gpub058:1406057:1406117 [1] NCCL INFO Connected all rings +gpub058:1406057:1406117 [1] NCCL INFO Channel 00/0 : 32[7000] -> 65[46000] [receive] via NET/IB/0 +gpub058:1406057:1406117 [1] NCCL INFO Channel 00/0 : 65[46000] -> 32[7000] [send] via NET/IB/0 +gpub058:1406057:1406117 [1] NCCL INFO Channel 00/0 : 65[46000] -> 64[7000] via P2P/IPC +gpub058:1406057:1406117 [1] NCCL INFO Channel 01/0 : 65[46000] -> 64[7000] via P2P/IPC +gpub058:1406057:1406117 [1] NCCL INFO Connected all trees +gpub058:1406057:1406117 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub058:1406057:1406117 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:1406057:1406117 [1] NCCL INFO comm 0x503d1e40 rank 65 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub091:1688865:1688865 [3] NCCL INFO cudaDriverVersion 12010 +gpub091:1688865:1688865 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0> +gpub091:1688865:1688865 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub091:1688865:1688929 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0> +gpub091:1688865:1688929 [3] NCCL INFO Using network IB +gpub091:1688865:1688929 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub091:1688865:1688929 [3] NCCL INFO Trees [0] -1/-1/-1->127->126 [1] -1/-1/-1->127->126 +gpub091:1688865:1688929 [3] NCCL INFO Channel 00/0 : 127[c7000] -> 0[7000] [send] via NET/IB/0 +gpub091:1688865:1688929 [3] NCCL INFO Channel 01/0 : 127[c7000] -> 0[7000] [send] via NET/IB/0 +gpub091:1688865:1688929 [3] NCCL INFO Connected all rings +gpub091:1688865:1688929 [3] NCCL INFO Channel 00/0 : 127[c7000] -> 126[85000] via P2P/IPC +gpub091:1688865:1688929 [3] NCCL INFO Channel 01/0 : 127[c7000] -> 126[85000] via P2P/IPC +gpub091:1688865:1688929 [3] NCCL INFO Connected all trees +gpub091:1688865:1688929 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub091:1688865:1688929 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub091:1688865:1688929 [3] NCCL INFO comm 0x8ce193d0 rank 127 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub001:279949:279949 [1] NCCL INFO cudaDriverVersion 12010 +gpub001:279949:279949 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:279949:279949 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:279949:280015 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:279949:280015 [1] NCCL INFO Using network IB +gpub001:279949:280015 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub001:279949:280015 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub001:279949:280015 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub001:279949:280015 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub001:279949:280015 [1] NCCL INFO Connected all rings +gpub001:279949:280015 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub001:279949:280015 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub001:279949:280015 [1] NCCL INFO Connected all trees +gpub001:279949:280015 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub001:279949:280015 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:279949:280015 [1] NCCL INFO comm 0x8b9ccb80 rank 1 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub035:2421441:2421441 [0] NCCL INFO cudaDriverVersion 12010 +gpub035:2421441:2421441 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.135<0> +gpub035:2421441:2421441 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub035:2421441:2421510 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.135<0> +gpub035:2421441:2421510 [0] NCCL INFO Using network IB +gpub035:2421441:2421510 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub035:2421441:2421510 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub035:2421441:2421510 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub035:2421441:2421510 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub035:2421441:2421510 [0] NCCL INFO Connected all rings +gpub035:2421441:2421510 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub035:2421441:2421510 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub035:2421441:2421510 [0] NCCL INFO Connected all trees +gpub035:2421441:2421510 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub035:2421441:2421510 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub035:2421441:2421510 [0] NCCL INFO comm 0x8ca3dc20 rank 40 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub001:279951:279951 [3] NCCL INFO cudaDriverVersion 12010 +gpub001:279951:279951 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:279951:279951 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:279951:280014 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:279951:280014 [3] NCCL INFO Using network IB +gpub001:279951:280014 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub001:279951:280014 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub001:279951:280014 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub001:279951:280014 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub001:279951:280014 [3] NCCL INFO Connected all rings +gpub001:279951:280014 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub001:279951:280014 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub001:279951:280014 [3] NCCL INFO Connected all trees +gpub001:279951:280014 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub001:279951:280014 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:279951:280014 [3] NCCL INFO comm 0x8d9eaa40 rank 3 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub085:1471917:1471917 [1] NCCL INFO cudaDriverVersion 12010 +gpub085:1471917:1471917 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:1471917:1471917 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:1471917:1471987 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.185<0> +gpub085:1471917:1471987 [1] NCCL INFO Using network IB +gpub085:1471917:1471987 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub085:1471917:1471987 [1] NCCL INFO Trees [0] 110/-1/-1->109->108 [1] 110/116/-1->109->108 +gpub085:1471917:1471987 [1] NCCL INFO Channel 00/0 : 109[46000] -> 110[85000] via P2P/IPC +gpub085:1471917:1471987 [1] NCCL INFO Channel 01/0 : 109[46000] -> 110[85000] via P2P/IPC +gpub085:1471917:1471987 [1] NCCL INFO Connected all rings +gpub085:1471917:1471987 [1] NCCL INFO Channel 01/0 : 109[46000] -> 116[7000] [send] via NET/IB/0 +gpub085:1471917:1471987 [1] NCCL INFO Channel 01/0 : 116[7000] -> 109[46000] [receive] via NET/IB/0 +gpub085:1471917:1471987 [1] NCCL INFO Channel 00/0 : 109[46000] -> 108[7000] via P2P/IPC +gpub085:1471917:1471987 [1] NCCL INFO Channel 01/0 : 109[46000] -> 108[7000] via P2P/IPC +gpub085:1471917:1471987 [1] NCCL INFO Connected all trees +gpub085:1471917:1471987 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub085:1471917:1471987 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:1471917:1471987 [1] NCCL INFO comm 0x9704c80 rank 109 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub085:1471918:1471918 [2] NCCL INFO cudaDriverVersion 12010 +gpub085:1471918:1471918 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:1471918:1471918 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:1471918:1471989 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.185<0> +gpub085:1471918:1471989 [2] NCCL INFO Using network IB +gpub085:1471918:1471989 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub085:1471918:1471989 [2] NCCL INFO Trees [0] 111/-1/-1->110->109 [1] 111/-1/-1->110->109 +gpub085:1471918:1471989 [2] NCCL INFO Channel 00/0 : 110[85000] -> 111[c7000] via P2P/IPC +gpub085:1471918:1471989 [2] NCCL INFO Channel 01/0 : 110[85000] -> 111[c7000] via P2P/IPC +gpub085:1471918:1471989 [2] NCCL INFO Connected all rings +gpub085:1471918:1471989 [2] NCCL INFO Channel 00/0 : 110[85000] -> 109[46000] via P2P/IPC +gpub085:1471918:1471989 [2] NCCL INFO Channel 01/0 : 110[85000] -> 109[46000] via P2P/IPC +gpub085:1471918:1471989 [2] NCCL INFO Connected all trees +gpub085:1471918:1471989 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub085:1471918:1471989 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:1471918:1471989 [2] NCCL INFO comm 0x4fbd7c40 rank 110 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub085:1471916:1471916 [0] NCCL INFO cudaDriverVersion 12010 +gpub085:1471916:1471916 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:1471916:1471916 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:1471916:1471986 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.185<0> +gpub085:1471916:1471986 [0] NCCL INFO Using network IB +gpub085:1471916:1471986 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub085:1471916:1471986 [0] NCCL INFO Trees [0] 109/-1/-1->108->104 [1] 109/100/-1->108->93 +gpub085:1471916:1471986 [0] NCCL INFO Channel 00/0 : 107[c7000] -> 108[7000] [receive] via NET/IB/0 +gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 107[c7000] -> 108[7000] [receive] via NET/IB/0 +gpub085:1471916:1471986 [0] NCCL INFO Channel 00/0 : 108[7000] -> 109[46000] via P2P/IPC +gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 108[7000] -> 109[46000] via P2P/IPC +gpub085:1471916:1471986 [0] NCCL INFO Connected all rings +gpub085:1471916:1471986 [0] NCCL INFO Channel 00/0 : 104[7000] -> 108[7000] [receive] via NET/IB/0 +gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 100[7000] -> 108[7000] [receive] via NET/IB/0 +gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 93[46000] -> 108[7000] [receive] via NET/IB/0 +gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 108[7000] -> 93[46000] [send] via NET/IB/0 +gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 108[7000] -> 100[7000] [send] via NET/IB/0 +gpub085:1471916:1471986 [0] NCCL INFO Channel 00/0 : 108[7000] -> 104[7000] [send] via NET/IB/0 +gpub085:1471916:1471986 [0] NCCL INFO Connected all trees +gpub085:1471916:1471986 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub085:1471916:1471986 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:1471916:1471986 [0] NCCL INFO comm 0xb90085a0 rank 108 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub068:1244812:1244812 [0] NCCL INFO cudaDriverVersion 12010 +gpub068:1244812:1244812 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.168<0> +gpub068:1244812:1244812 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub068:1244812:1244877 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.168<0> +gpub068:1244812:1244877 [0] NCCL INFO Using network IB +gpub068:1244812:1244877 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub068:1244812:1244877 [0] NCCL INFO Trees [0] 93/-1/-1->92->88 [1] 93/76/-1->92->61 +gpub068:1244812:1244877 [0] NCCL INFO Channel 00/0 : 91[c7000] -> 92[7000] [receive] via NET/IB/0 +gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 91[c7000] -> 92[7000] [receive] via NET/IB/0 +gpub068:1244812:1244877 [0] NCCL INFO Channel 00/0 : 92[7000] -> 93[46000] via P2P/IPC +gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 92[7000] -> 93[46000] via P2P/IPC +gpub068:1244812:1244877 [0] NCCL INFO Connected all rings +gpub068:1244812:1244877 [0] NCCL INFO Channel 00/0 : 88[7000] -> 92[7000] [receive] via NET/IB/0 +gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 76[7000] -> 92[7000] [receive] via NET/IB/0 +gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 61[46000] -> 92[7000] [receive] via NET/IB/0 +gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 92[7000] -> 61[46000] [send] via NET/IB/0 +gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 92[7000] -> 76[7000] [send] via NET/IB/0 +gpub068:1244812:1244877 [0] NCCL INFO Channel 00/0 : 92[7000] -> 88[7000] [send] via NET/IB/0 +gpub068:1244812:1244877 [0] NCCL INFO Connected all trees +gpub068:1244812:1244877 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub068:1244812:1244877 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub068:1244812:1244877 [0] NCCL INFO comm 0x4fce69e0 rank 92 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub068:1244813:1244813 [1] NCCL INFO cudaDriverVersion 12010 +gpub068:1244813:1244813 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.168<0> +gpub068:1244813:1244813 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub068:1244813:1244876 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.168<0> +gpub068:1244813:1244876 [1] NCCL INFO Using network IB +gpub068:1244813:1244876 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub068:1244813:1244876 [1] NCCL INFO Trees [0] 94/-1/-1->93->92 [1] 94/108/-1->93->92 +gpub068:1244813:1244876 [1] NCCL INFO Channel 00/0 : 93[46000] -> 94[85000] via P2P/IPC +gpub068:1244813:1244876 [1] NCCL INFO Channel 01/0 : 93[46000] -> 94[85000] via P2P/IPC +gpub068:1244813:1244876 [1] NCCL INFO Connected all rings +gpub068:1244813:1244876 [1] NCCL INFO Channel 01/0 : 93[46000] -> 108[7000] [send] via NET/IB/0 +gpub068:1244813:1244876 [1] NCCL INFO Channel 01/0 : 108[7000] -> 93[46000] [receive] via NET/IB/0 +gpub068:1244813:1244876 [1] NCCL INFO Channel 00/0 : 93[46000] -> 92[7000] via P2P/IPC +gpub068:1244813:1244876 [1] NCCL INFO Channel 01/0 : 93[46000] -> 92[7000] via P2P/IPC +gpub068:1244813:1244876 [1] NCCL INFO Connected all trees +gpub068:1244813:1244876 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub068:1244813:1244876 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub068:1244813:1244876 [1] NCCL INFO comm 0x510f96c0 rank 93 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub015:699660:699660 [3] NCCL INFO cudaDriverVersion 12010 +gpub015:699660:699660 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:699660:699660 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:699660:699721 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:699660:699721 [3] NCCL INFO Using network IB +gpub015:699660:699721 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub015:699660:699721 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub015:699660:699721 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub015:699660:699721 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub015:699660:699721 [3] NCCL INFO Connected all rings +gpub015:699660:699721 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub015:699660:699721 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub015:699660:699721 [3] NCCL INFO Connected all trees +gpub015:699660:699721 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub015:699660:699721 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:699660:699721 [3] NCCL INFO comm 0x4f795830 rank 27 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub085:1471919:1471919 [3] NCCL INFO cudaDriverVersion 12010 +gpub085:1471919:1471919 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:1471919:1471919 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:1471919:1471988 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.185<0> +gpub085:1471919:1471988 [3] NCCL INFO Using network IB +gpub085:1471919:1471988 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub085:1471919:1471988 [3] NCCL INFO Trees [0] -1/-1/-1->111->110 [1] -1/-1/-1->111->110 +gpub085:1471919:1471988 [3] NCCL INFO Channel 00/0 : 111[c7000] -> 112[7000] [send] via NET/IB/0 +gpub085:1471919:1471988 [3] NCCL INFO Channel 01/0 : 111[c7000] -> 112[7000] [send] via NET/IB/0 +gpub085:1471919:1471988 [3] NCCL INFO Connected all rings +gpub085:1471919:1471988 [3] NCCL INFO Channel 00/0 : 111[c7000] -> 110[85000] via P2P/IPC +gpub085:1471919:1471988 [3] NCCL INFO Channel 01/0 : 111[c7000] -> 110[85000] via P2P/IPC +gpub085:1471919:1471988 [3] NCCL INFO Connected all trees +gpub085:1471919:1471988 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub085:1471919:1471988 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:1471919:1471988 [3] NCCL INFO comm 0xb79c1300 rank 111 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub041:1218042:1218042 [3] NCCL INFO cudaDriverVersion 12010 +gpub041:1218042:1218042 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1218042:1218042 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1218042:1218107 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1218042:1218107 [3] NCCL INFO Using network IB +gpub041:1218042:1218107 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub041:1218042:1218107 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub041:1218042:1218107 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 64[7000] [send] via NET/IB/0 +gpub041:1218042:1218107 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 64[7000] [send] via NET/IB/0 +gpub041:1218042:1218107 [3] NCCL INFO Connected all rings +gpub041:1218042:1218107 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub041:1218042:1218107 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub041:1218042:1218107 [3] NCCL INFO Connected all trees +gpub041:1218042:1218107 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub041:1218042:1218107 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1218042:1218107 [3] NCCL INFO comm 0x94448880 rank 63 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub058:1406056:1406056 [0] NCCL INFO cudaDriverVersion 12010 +gpub058:1406056:1406056 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:1406056:1406056 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:1406056:1406118 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.158<0> +gpub058:1406056:1406118 [0] NCCL INFO Using network IB +gpub058:1406056:1406118 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub058:1406056:1406118 [0] NCCL INFO Trees [0] 65/96/-1->64->0 [1] 65/-1/-1->64->68 +gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 64[7000] [receive] via NET/IB/0 +gpub058:1406056:1406118 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 64[7000] [receive] via NET/IB/0 +gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 64[7000] -> 65[46000] via P2P/IPC +gpub058:1406056:1406118 [0] NCCL INFO Channel 01/0 : 64[7000] -> 65[46000] via P2P/IPC +gpub058:1406056:1406118 [0] NCCL INFO Connected all rings +gpub058:1406056:1406118 [0] NCCL INFO Channel 01/0 : 64[7000] -> 68[7000] [send] via NET/IB/0 +gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 64[7000] -> 96[7000] [send] via NET/IB/0 +gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 0[7000] -> 64[7000] [receive] via NET/IB/0 +gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 64[7000] -> 0[7000] [send] via NET/IB/0 +gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 96[7000] -> 64[7000] [receive] via NET/IB/0 +gpub058:1406056:1406118 [0] NCCL INFO Channel 01/0 : 68[7000] -> 64[7000] [receive] via NET/IB/0 +gpub058:1406056:1406118 [0] NCCL INFO Connected all trees +gpub058:1406056:1406118 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub058:1406056:1406118 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:1406056:1406118 [0] NCCL INFO comm 0x9044470 rank 64 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub041:1218040:1218040 [1] NCCL INFO cudaDriverVersion 12010 +gpub041:1218040:1218040 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1218040:1218040 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1218040:1218108 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1218040:1218108 [1] NCCL INFO Using network IB +gpub041:1218040:1218108 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub041:1218040:1218108 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/92/-1->61->60 +gpub041:1218040:1218108 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub041:1218040:1218108 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub041:1218040:1218108 [1] NCCL INFO Connected all rings +gpub041:1218040:1218108 [1] NCCL INFO Channel 01/0 : 61[46000] -> 92[7000] [send] via NET/IB/0 +gpub041:1218040:1218108 [1] NCCL INFO Channel 01/0 : 92[7000] -> 61[46000] [receive] via NET/IB/0 +gpub015:699658:699658 [1] NCCL INFO cudaDriverVersion 12010 +gpub015:699658:699658 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:699658:699658 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:699658:699719 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:699658:699719 [1] NCCL INFO Using network IB +gpub015:699658:699719 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub015:699658:699719 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub015:699658:699719 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub015:699658:699719 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub015:699658:699719 [1] NCCL INFO Connected all rings +gpub015:699658:699719 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub015:699658:699719 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub015:699658:699719 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub015:699658:699719 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub015:699658:699719 [1] NCCL INFO Connected all trees +gpub015:699658:699719 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub015:699658:699719 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:699658:699719 [1] NCCL INFO comm 0x4ff5faa0 rank 25 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub041:1218040:1218108 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub041:1218040:1218108 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub041:1218040:1218108 [1] NCCL INFO Connected all trees +gpub041:1218040:1218108 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub041:1218040:1218108 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1218040:1218108 [1] NCCL INFO comm 0x8e9b8f0 rank 61 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub015:699657:699657 [0] NCCL INFO cudaDriverVersion 12010 +gpub015:699657:699657 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:699657:699657 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:699657:699718 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:699657:699718 [0] NCCL INFO Using network IB +gpub015:699657:699718 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub015:699657:699718 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub015:699657:699718 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub015:699657:699718 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub015:699657:699718 [0] NCCL INFO Connected all rings +gpub015:699657:699718 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub015:699657:699718 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub015:699657:699718 [0] NCCL INFO Connected all trees +gpub015:699657:699718 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub015:699657:699718 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:699657:699718 [0] NCCL INFO comm 0x50bba4a0 rank 24 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub080:3990799:3990799 [0] NCCL INFO cudaDriverVersion 12010 +gpub080:3990799:3990799 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0> +gpub080:3990799:3990799 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub080:3990799:3990868 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0> +gpub080:3990799:3990868 [0] NCCL INFO Using network IB +gpub080:3990799:3990868 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub080:3990799:3990868 [0] NCCL INFO Trees [0] 101/-1/-1->100->105 [1] 101/96/-1->100->108 +gpub080:3990799:3990868 [0] NCCL INFO Channel 00/0 : 99[c7000] -> 100[7000] [receive] via NET/IB/0 +gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 99[c7000] -> 100[7000] [receive] via NET/IB/0 +gpub080:3990799:3990868 [0] NCCL INFO Channel 00/0 : 100[7000] -> 101[46000] via P2P/IPC +gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 100[7000] -> 101[46000] via P2P/IPC +gpub080:3990799:3990868 [0] NCCL INFO Connected all rings +gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 96[7000] -> 100[7000] [receive] via NET/IB/0 +gpub080:3990799:3990868 [0] NCCL INFO Channel 00/0 : 100[7000] -> 105[46000] [send] via NET/IB/0 +gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 100[7000] -> 108[7000] [send] via NET/IB/0 +gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 108[7000] -> 100[7000] [receive] via NET/IB/0 +gpub080:3990799:3990868 [0] NCCL INFO Channel 00/0 : 105[46000] -> 100[7000] [receive] via NET/IB/0 +gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 100[7000] -> 96[7000] [send] via NET/IB/0 +gpub080:3990799:3990868 [0] NCCL INFO Connected all trees +gpub080:3990799:3990868 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub080:3990799:3990868 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub080:3990799:3990868 [0] NCCL INFO comm 0x8be5af20 rank 100 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub015:699659:699659 [2] NCCL INFO cudaDriverVersion 12010 +gpub015:699659:699659 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:699659:699659 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:699659:699720 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:699659:699720 [2] NCCL INFO Using network IB +gpub015:699659:699720 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub015:699659:699720 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub015:699659:699720 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub015:699659:699720 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub015:699659:699720 [2] NCCL INFO Connected all rings +gpub015:699659:699720 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub015:699659:699720 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub015:699659:699720 [2] NCCL INFO Connected all trees +gpub015:699659:699720 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub015:699659:699720 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:699659:699720 [2] NCCL INFO comm 0x504aba10 rank 26 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub080:3990800:3990800 [1] NCCL INFO cudaDriverVersion 12010 +gpub080:3990800:3990800 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0> +gpub080:3990800:3990800 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub080:3990800:3990871 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0> +gpub080:3990800:3990871 [1] NCCL INFO Using network IB +gpub080:3990800:3990871 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub080:3990800:3990871 [1] NCCL INFO Trees [0] 102/-1/-1->101->100 [1] 102/104/-1->101->100 +gpub080:3990800:3990871 [1] NCCL INFO Channel 00/0 : 101[46000] -> 102[85000] via P2P/IPC +gpub080:3990800:3990871 [1] NCCL INFO Channel 01/0 : 101[46000] -> 102[85000] via P2P/IPC +gpub080:3990800:3990871 [1] NCCL INFO Connected all rings +gpub080:3990800:3990871 [1] NCCL INFO Channel 01/0 : 101[46000] -> 104[7000] [send] via NET/IB/0 +gpub080:3990800:3990871 [1] NCCL INFO Channel 01/0 : 104[7000] -> 101[46000] [receive] via NET/IB/0 +gpub080:3990800:3990871 [1] NCCL INFO Channel 00/0 : 101[46000] -> 100[7000] via P2P/IPC +gpub080:3990800:3990871 [1] NCCL INFO Channel 01/0 : 101[46000] -> 100[7000] via P2P/IPC +gpub080:3990800:3990871 [1] NCCL INFO Connected all trees +gpub080:3990800:3990871 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub080:3990800:3990871 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub080:3990800:3990871 [1] NCCL INFO comm 0xb59713a0 rank 101 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub080:3990801:3990801 [2] NCCL INFO cudaDriverVersion 12010 +gpub080:3990801:3990801 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0> +gpub080:3990801:3990801 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub080:3990801:3990870 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0> +gpub080:3990801:3990870 [2] NCCL INFO Using network IB +gpub080:3990801:3990870 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub080:3990801:3990870 [2] NCCL INFO Trees [0] 103/-1/-1->102->101 [1] 103/-1/-1->102->101 +gpub080:3990801:3990870 [2] NCCL INFO Channel 00/0 : 102[85000] -> 103[c7000] via P2P/IPC +gpub080:3990801:3990870 [2] NCCL INFO Channel 01/0 : 102[85000] -> 103[c7000] via P2P/IPC +gpub080:3990801:3990870 [2] NCCL INFO Connected all rings +gpub080:3990801:3990870 [2] NCCL INFO Channel 00/0 : 102[85000] -> 101[46000] via P2P/IPC +gpub080:3990801:3990870 [2] NCCL INFO Channel 01/0 : 102[85000] -> 101[46000] via P2P/IPC +gpub080:3990801:3990870 [2] NCCL INFO Connected all trees +gpub080:3990801:3990870 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub080:3990801:3990870 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub080:3990801:3990870 [2] NCCL INFO comm 0x52129500 rank 102 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub041:1218039:1218039 [0] NCCL INFO cudaDriverVersion 12010 +gpub041:1218039:1218039 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1218039:1218039 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1218039:1218106 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1218039:1218106 [0] NCCL INFO Using network IB +gpub041:1218039:1218106 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub041:1218039:1218106 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->124 +gpub041:1218039:1218106 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub041:1218039:1218106 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub041:1218039:1218106 [0] NCCL INFO Connected all rings +gpub041:1218039:1218106 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 124[7000] -> 60[7000] [receive] via NET/IB/0 +gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 60[7000] -> 124[7000] [send] via NET/IB/0 +gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub041:1218039:1218106 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub041:1218039:1218106 [0] NCCL INFO Connected all trees +gpub041:1218039:1218106 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub041:1218039:1218106 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1218039:1218106 [0] NCCL INFO comm 0x520cee30 rank 60 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub041:1218041:1218041 [2] NCCL INFO cudaDriverVersion 12010 +gpub041:1218041:1218041 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1218041:1218041 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1218041:1218109 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1218041:1218109 [2] NCCL INFO Using network IB +gpub041:1218041:1218109 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub041:1218041:1218109 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub041:1218041:1218109 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub041:1218041:1218109 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub041:1218041:1218109 [2] NCCL INFO Connected all rings +gpub041:1218041:1218109 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub041:1218041:1218109 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub041:1218041:1218109 [2] NCCL INFO Connected all trees +gpub041:1218041:1218109 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub041:1218041:1218109 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1218041:1218109 [2] NCCL INFO comm 0x8dfe3560 rank 62 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub035:2421442:2421442 [1] NCCL INFO cudaDriverVersion 12010 +gpub035:2421442:2421442 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.135<0> +gpub035:2421442:2421442 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub035:2421442:2421509 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.135<0> +gpub035:2421442:2421509 [1] NCCL INFO Using network IB +gpub035:2421442:2421509 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub035:2421442:2421509 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub035:2421442:2421509 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub035:2421442:2421509 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub035:2421442:2421509 [1] NCCL INFO Connected all rings +gpub035:2421442:2421509 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub035:2421442:2421509 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub035:2421442:2421509 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub035:2421442:2421509 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub035:2421442:2421509 [1] NCCL INFO Connected all trees +gpub035:2421442:2421509 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub035:2421442:2421509 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub035:2421442:2421509 [1] NCCL INFO comm 0x9bdcb70 rank 41 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub035:2421444:2421444 [3] NCCL INFO cudaDriverVersion 12010 +gpub035:2421444:2421444 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.135<0> +gpub035:2421444:2421444 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub035:2421444:2421508 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.135<0> +gpub035:2421444:2421508 [3] NCCL INFO Using network IB +gpub035:2421444:2421508 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub035:2421444:2421508 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub035:2421444:2421508 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub035:2421444:2421508 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub035:2421444:2421508 [3] NCCL INFO Connected all rings +gpub035:2421444:2421508 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub035:2421444:2421508 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub035:2421444:2421508 [3] NCCL INFO Connected all trees +gpub035:2421444:2421508 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub035:2421444:2421508 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub035:2421444:2421508 [3] NCCL INFO comm 0x93391d0 rank 43 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub035:2421443:2421443 [2] NCCL INFO cudaDriverVersion 12010 +gpub035:2421443:2421443 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.135<0> +gpub035:2421443:2421443 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub035:2421443:2421507 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.135<0> +gpub035:2421443:2421507 [2] NCCL INFO Using network IB +gpub035:2421443:2421507 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub035:2421443:2421507 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub035:2421443:2421507 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub035:2421443:2421507 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub035:2421443:2421507 [2] NCCL INFO Connected all rings +gpub035:2421443:2421507 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub035:2421443:2421507 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub035:2421443:2421507 [2] NCCL INFO Connected all trees +gpub035:2421443:2421507 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub035:2421443:2421507 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub035:2421443:2421507 [2] NCCL INFO comm 0x91022d0 rank 42 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub016:1262187:1262187 [0] NCCL INFO cudaDriverVersion 12010 +gpub016:1262187:1262187 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:1262187:1262187 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:1262187:1262261 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0> +gpub016:1262187:1262261 [0] NCCL INFO Using network IB +gpub016:1262187:1262261 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub016:1262187:1262261 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub016:1262187:1262261 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub016:1262187:1262261 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub016:1262187:1262261 [0] NCCL INFO Connected all rings +gpub016:1262187:1262261 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub016:1262187:1262261 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub016:1262187:1262261 [0] NCCL INFO Connected all trees +gpub016:1262187:1262261 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub016:1262187:1262261 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:1262187:1262261 [0] NCCL INFO comm 0x2218f980 rank 28 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub016:1262190:1262190 [3] NCCL INFO cudaDriverVersion 12010 +gpub016:1262190:1262190 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:1262190:1262190 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:1262190:1262260 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0> +gpub016:1262190:1262260 [3] NCCL INFO Using network IB +gpub016:1262190:1262260 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub016:1262190:1262260 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub016:1262190:1262260 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub016:1262190:1262260 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub016:1262190:1262260 [3] NCCL INFO Connected all rings +gpub016:1262190:1262260 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub016:1262190:1262260 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub016:1262190:1262260 [3] NCCL INFO Connected all trees +gpub016:1262190:1262260 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub016:1262190:1262260 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:1262190:1262260 [3] NCCL INFO comm 0xa7c96f60 rank 31 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub031:1764393:1764393 [0] NCCL INFO cudaDriverVersion 12010 +gpub031:1764393:1764393 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1764393:1764393 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1764393:1764461 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1764393:1764461 [0] NCCL INFO Using network IB +gpub031:1764393:1764461 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub031:1764393:1764461 [0] NCCL INFO Trees [0] 33/48/-1->32->65 [1] 33/-1/-1->32->36 +gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub031:1764393:1764461 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub031:1764393:1764461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub031:1764393:1764461 [0] NCCL INFO Connected all rings +gpub031:1764393:1764461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 65[46000] [send] via NET/IB/0 +gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 65[46000] -> 32[7000] [receive] via NET/IB/0 +gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub031:1764393:1764461 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub031:1764393:1764461 [0] NCCL INFO Connected all trees +gpub031:1764393:1764461 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub031:1764393:1764461 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1764393:1764461 [0] NCCL INFO comm 0xb6eeceb0 rank 32 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub016:1262189:1262189 [2] NCCL INFO cudaDriverVersion 12010 +gpub016:1262189:1262189 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:1262189:1262189 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:1262189:1262263 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0> +gpub016:1262189:1262263 [2] NCCL INFO Using network IB +gpub016:1262189:1262263 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub016:1262189:1262263 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub016:1262189:1262263 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub016:1262189:1262263 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub016:1262189:1262263 [2] NCCL INFO Connected all rings +gpub016:1262189:1262263 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub016:1262189:1262263 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub016:1262189:1262263 [2] NCCL INFO Connected all trees +gpub016:1262189:1262263 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub016:1262189:1262263 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:1262189:1262263 [2] NCCL INFO comm 0x503afdd0 rank 30 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub011:1300883:1300883 [3] NCCL INFO cudaDriverVersion 12010 +gpub011:1300883:1300883 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1300883:1300883 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1300883:1300950 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1300883:1300950 [3] NCCL INFO Using network IB +gpub011:1300883:1300950 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub011:1300883:1300950 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub011:1300883:1300950 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub011:1300883:1300950 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub011:1300883:1300950 [3] NCCL INFO Connected all rings +gpub011:1300883:1300950 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub011:1300883:1300950 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub031:1764395:1764395 [2] NCCL INFO cudaDriverVersion 12010 +gpub031:1764395:1764395 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1764395:1764395 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1764395:1764462 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1764395:1764462 [2] NCCL INFO Using network IB +gpub031:1764395:1764462 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub031:1764395:1764462 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub031:1764395:1764462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub031:1764395:1764462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub031:1764395:1764462 [2] NCCL INFO Connected all rings +gpub031:1764395:1764462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub031:1764395:1764462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub011:1300883:1300950 [3] NCCL INFO Connected all trees +gpub011:1300883:1300950 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub011:1300883:1300950 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1300883:1300950 [3] NCCL INFO comm 0xb778d540 rank 11 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub031:1764395:1764462 [2] NCCL INFO Connected all trees +gpub031:1764395:1764462 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub031:1764395:1764462 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1764395:1764462 [2] NCCL INFO comm 0x8d6e3c20 rank 34 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub011:1300880:1300880 [0] NCCL INFO cudaDriverVersion 12010 +gpub011:1300880:1300880 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1300880:1300880 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1300880:1300948 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1300880:1300948 [0] NCCL INFO Using network IB +gpub011:1300880:1300948 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub011:1300880:1300948 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub011:1300880:1300948 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub011:1300880:1300948 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub011:1300880:1300948 [0] NCCL INFO Connected all rings +gpub011:1300880:1300948 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub011:1300880:1300948 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub011:1300880:1300948 [0] NCCL INFO Connected all trees +gpub011:1300880:1300948 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub011:1300880:1300948 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1300880:1300948 [0] NCCL INFO comm 0x50df64a0 rank 8 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub031:1764394:1764394 [1] NCCL INFO cudaDriverVersion 12010 +gpub031:1764394:1764394 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1764394:1764394 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1764394:1764464 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1764394:1764464 [1] NCCL INFO Using network IB +gpub031:1764394:1764464 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub031:1764394:1764464 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub031:1764394:1764464 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub031:1764394:1764464 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub031:1764394:1764464 [1] NCCL INFO Connected all rings +gpub031:1764394:1764464 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub031:1764394:1764464 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub031:1764394:1764464 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub031:1764394:1764464 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub031:1764394:1764464 [1] NCCL INFO Connected all trees +gpub031:1764394:1764464 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub031:1764394:1764464 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1764394:1764464 [1] NCCL INFO comm 0x514068e0 rank 33 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub011:1300881:1300881 [1] NCCL INFO cudaDriverVersion 12010 +gpub011:1300881:1300881 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1300881:1300881 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1300881:1300947 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1300881:1300947 [1] NCCL INFO Using network IB +gpub011:1300881:1300947 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub011:1300881:1300947 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub011:1300881:1300947 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub011:1300881:1300947 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub011:1300881:1300947 [1] NCCL INFO Connected all rings +gpub011:1300881:1300947 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub011:1300881:1300947 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub011:1300881:1300947 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub011:1300881:1300947 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub011:1300881:1300947 [1] NCCL INFO Connected all trees +gpub011:1300881:1300947 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub011:1300881:1300947 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1300881:1300947 [1] NCCL INFO comm 0xb0451f0 rank 9 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub031:1764396:1764396 [3] NCCL INFO cudaDriverVersion 12010 +gpub031:1764396:1764396 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1764396:1764396 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1764396:1764463 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1764396:1764463 [3] NCCL INFO Using network IB +gpub031:1764396:1764463 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub031:1764396:1764463 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub031:1764396:1764463 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub031:1764396:1764463 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub031:1764396:1764463 [3] NCCL INFO Connected all rings +gpub031:1764396:1764463 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub031:1764396:1764463 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub031:1764396:1764463 [3] NCCL INFO Connected all trees +gpub031:1764396:1764463 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub031:1764396:1764463 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1764396:1764463 [3] NCCL INFO comm 0x9dc35740 rank 35 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub016:1262188:1262188 [1] NCCL INFO cudaDriverVersion 12010 +gpub016:1262188:1262188 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:1262188:1262188 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:1262188:1262262 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0> +gpub016:1262188:1262262 [1] NCCL INFO Using network IB +gpub016:1262188:1262262 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub016:1262188:1262262 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub016:1262188:1262262 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub016:1262188:1262262 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub016:1262188:1262262 [1] NCCL INFO Connected all rings +gpub016:1262188:1262262 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub016:1262188:1262262 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub016:1262188:1262262 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub016:1262188:1262262 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub016:1262188:1262262 [1] NCCL INFO Connected all trees +gpub016:1262188:1262262 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub016:1262188:1262262 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:1262188:1262262 [1] NCCL INFO comm 0x8bac38c0 rank 29 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub011:1300882:1300882 [2] NCCL INFO cudaDriverVersion 12010 +gpub011:1300882:1300882 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1300882:1300882 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1300882:1300949 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1300882:1300949 [2] NCCL INFO Using network IB +gpub011:1300882:1300949 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub011:1300882:1300949 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub011:1300882:1300949 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub011:1300882:1300949 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub011:1300882:1300949 [2] NCCL INFO Connected all rings +gpub011:1300882:1300949 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub011:1300882:1300949 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub011:1300882:1300949 [2] NCCL INFO Connected all trees +gpub011:1300882:1300949 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub011:1300882:1300949 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1300882:1300949 [2] NCCL INFO comm 0x507d2a60 rank 10 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub040:1881001:1881001 [2] NCCL INFO cudaDriverVersion 12010 +gpub040:1881001:1881001 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:1881001:1881001 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:1881001:1881063 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:1881001:1881063 [2] NCCL INFO Using network IB +gpub040:1881001:1881063 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub040:1881001:1881063 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub040:1881001:1881063 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub040:1881001:1881063 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub040:1881001:1881063 [2] NCCL INFO Connected all rings +gpub040:1881001:1881063 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub040:1881001:1881063 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub040:1881001:1881063 [2] NCCL INFO Connected all trees +gpub040:1881001:1881063 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub040:1881001:1881063 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:1881001:1881063 [2] NCCL INFO comm 0x930e4a0 rank 58 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub066:1330380:1330380 [2] NCCL INFO cudaDriverVersion 12010 +gpub066:1330380:1330380 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1330380:1330380 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1330380:1330441 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1330380:1330441 [2] NCCL INFO Using network IB +gpub066:1330380:1330441 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub066:1330380:1330441 [2] NCCL INFO Trees [0] 87/-1/-1->86->85 [1] 87/-1/-1->86->85 +gpub066:1330380:1330441 [2] NCCL INFO Channel 00/0 : 86[85000] -> 87[c7000] via P2P/IPC +gpub066:1330380:1330441 [2] NCCL INFO Channel 01/0 : 86[85000] -> 87[c7000] via P2P/IPC +gpub066:1330380:1330441 [2] NCCL INFO Connected all rings +gpub066:1330380:1330441 [2] NCCL INFO Channel 00/0 : 86[85000] -> 85[46000] via P2P/IPC +gpub066:1330380:1330441 [2] NCCL INFO Channel 01/0 : 86[85000] -> 85[46000] via P2P/IPC +gpub066:1330380:1330441 [2] NCCL INFO Connected all trees +gpub066:1330380:1330441 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub066:1330380:1330441 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1330380:1330441 [2] NCCL INFO comm 0xb68349c0 rank 86 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE +gpub040:1881002:1881002 [3] NCCL INFO cudaDriverVersion 12010 +gpub040:1881002:1881002 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:1881002:1881002 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:1881002:1881064 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:1881002:1881064 [3] NCCL INFO Using network IB +gpub040:1881002:1881064 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub040:1881002:1881064 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub040:1881002:1881064 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub040:1881002:1881064 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub040:1881002:1881064 [3] NCCL INFO Connected all rings +gpub040:1881002:1881064 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub040:1881002:1881064 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub040:1881002:1881064 [3] NCCL INFO Connected all trees +gpub040:1881002:1881064 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub040:1881002:1881064 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:1881002:1881064 [3] NCCL INFO comm 0x8c834280 rank 59 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub040:1880999:1880999 [0] NCCL INFO cudaDriverVersion 12010 +gpub040:1880999:1880999 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:1880999:1880999 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:1880999:1881065 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:1880999:1881065 [0] NCCL INFO Using network IB +gpub040:1880999:1881065 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub040:1880999:1881065 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub040:1880999:1881065 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub040:1880999:1881065 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub040:1880999:1881065 [0] NCCL INFO Connected all rings +gpub040:1880999:1881065 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub040:1880999:1881065 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub040:1880999:1881065 [0] NCCL INFO Connected all trees +gpub040:1880999:1881065 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub040:1880999:1881065 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:1880999:1881065 [0] NCCL INFO comm 0x8da18860 rank 56 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub040:1881000:1881000 [1] NCCL INFO cudaDriverVersion 12010 +gpub040:1881000:1881000 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:1881000:1881000 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:1881000:1881066 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:1881000:1881066 [1] NCCL INFO Using network IB +gpub040:1881000:1881066 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub040:1881000:1881066 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub040:1881000:1881066 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub040:1881000:1881066 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub040:1881000:1881066 [1] NCCL INFO Connected all rings +gpub040:1881000:1881066 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub040:1881000:1881066 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub040:1881000:1881066 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub040:1881000:1881066 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub040:1881000:1881066 [1] NCCL INFO Connected all trees +gpub040:1881000:1881066 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub040:1881000:1881066 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:1881000:1881066 [1] NCCL INFO comm 0x9d50600 rank 57 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +gpub066:1330381:1330381 [3] NCCL INFO cudaDriverVersion 12010 +gpub066:1330381:1330381 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1330381:1330381 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1330381:1330442 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1330381:1330442 [3] NCCL INFO Using network IB +gpub066:1330381:1330442 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub066:1330381:1330442 [3] NCCL INFO Trees [0] -1/-1/-1->87->86 [1] -1/-1/-1->87->86 +gpub066:1330381:1330442 [3] NCCL INFO Channel 00/0 : 87[c7000] -> 88[7000] [send] via NET/IB/0 +gpub066:1330381:1330442 [3] NCCL INFO Channel 01/0 : 87[c7000] -> 88[7000] [send] via NET/IB/0 +gpub066:1330381:1330442 [3] NCCL INFO Connected all rings +gpub066:1330381:1330442 [3] NCCL INFO Channel 00/0 : 87[c7000] -> 86[85000] via P2P/IPC +gpub066:1330381:1330442 [3] NCCL INFO Channel 01/0 : 87[c7000] -> 86[85000] via P2P/IPC +gpub066:1330381:1330442 [3] NCCL INFO Connected all trees +gpub066:1330381:1330442 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub066:1330381:1330442 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1330381:1330442 [3] NCCL INFO comm 0x50ca2de0 rank 87 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE +gpub066:1330378:1330378 [0] NCCL INFO cudaDriverVersion 12010 +gpub066:1330378:1330378 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1330378:1330378 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1330378:1330440 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1330378:1330440 [0] NCCL INFO Using network IB +gpub066:1330378:1330440 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub066:1330378:1330440 [0] NCCL INFO Trees [0] 85/-1/-1->84->89 [1] 85/80/-1->84->77 +gpub066:1330378:1330440 [0] NCCL INFO Channel 00/0 : 83[c7000] -> 84[7000] [receive] via NET/IB/0 +gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 83[c7000] -> 84[7000] [receive] via NET/IB/0 +gpub066:1330378:1330440 [0] NCCL INFO Channel 00/0 : 84[7000] -> 85[46000] via P2P/IPC +gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 84[7000] -> 85[46000] via P2P/IPC +gpub066:1330378:1330440 [0] NCCL INFO Connected all rings +gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 80[7000] -> 84[7000] [receive] via NET/IB/0 +gpub066:1330378:1330440 [0] NCCL INFO Channel 00/0 : 84[7000] -> 89[46000] [send] via NET/IB/0 +gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 77[46000] -> 84[7000] [receive] via NET/IB/0 +gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 84[7000] -> 77[46000] [send] via NET/IB/0 +gpub066:1330378:1330440 [0] NCCL INFO Channel 00/0 : 89[46000] -> 84[7000] [receive] via NET/IB/0 +gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 84[7000] -> 80[7000] [send] via NET/IB/0 +gpub066:1330378:1330440 [0] NCCL INFO Connected all trees +gpub066:1330378:1330440 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub066:1330378:1330440 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1330378:1330440 [0] NCCL INFO comm 0x8c91e400 rank 84 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE +gpub066:1330379:1330379 [1] NCCL INFO cudaDriverVersion 12010 +gpub066:1330379:1330379 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1330379:1330379 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1330379:1330439 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1330379:1330439 [1] NCCL INFO Using network IB +gpub066:1330379:1330439 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub066:1330379:1330439 [1] NCCL INFO Trees [0] 86/-1/-1->85->84 [1] 86/88/-1->85->84 +gpub066:1330379:1330439 [1] NCCL INFO Channel 00/0 : 85[46000] -> 86[85000] via P2P/IPC +gpub066:1330379:1330439 [1] NCCL INFO Channel 01/0 : 85[46000] -> 86[85000] via P2P/IPC +gpub066:1330379:1330439 [1] NCCL INFO Connected all rings +gpub066:1330379:1330439 [1] NCCL INFO Channel 01/0 : 85[46000] -> 88[7000] [send] via NET/IB/0 +gpub066:1330379:1330439 [1] NCCL INFO Channel 01/0 : 88[7000] -> 85[46000] [receive] via NET/IB/0 +gpub066:1330379:1330439 [1] NCCL INFO Channel 00/0 : 85[46000] -> 84[7000] via P2P/IPC +gpub066:1330379:1330439 [1] NCCL INFO Channel 01/0 : 85[46000] -> 84[7000] via P2P/IPC +gpub066:1330379:1330439 [1] NCCL INFO Connected all trees +gpub066:1330379:1330439 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512 +gpub066:1330379:1330439 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1330379:1330439 [1] NCCL INFO comm 0xa42e450 rank 85 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub001:0/128] 2023-07-02 01:48:07,181 (trainer:732) INFO: 1epoch:train:1-100batch: iter_time=1.540, forward_time=0.272, loss_ctc=540.181, loss_att=397.271, acc=0.027, loss=440.144, backward_time=1.098, grad_norm=584.638, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.122, optim0_lr0=1.288e-06, train_time=4.972 +[gpub001:0/128] 2023-07-02 01:50:35,497 (trainer:732) INFO: 1epoch:train:101-200batch: iter_time=1.152e-04, forward_time=0.141, loss_ctc=462.426, loss_att=336.277, acc=0.029, loss=374.121, backward_time=1.077, grad_norm=387.500, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=3.788e-06, train_time=1.483 +[gpub001:0/128] 2023-07-02 01:53:03,465 (trainer:732) INFO: 1epoch:train:201-300batch: iter_time=1.085e-04, forward_time=0.142, loss_ctc=461.451, loss_att=337.298, acc=0.045, loss=374.544, backward_time=1.078, grad_norm=417.405, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=6.287e-06, train_time=1.479 +[gpub001:0/128] 2023-07-02 01:55:31,849 (trainer:732) INFO: 1epoch:train:301-400batch: iter_time=1.042e-04, forward_time=0.143, loss_ctc=342.808, loss_att=285.551, acc=0.064, loss=302.728, backward_time=1.080, grad_norm=643.089, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=8.788e-06, train_time=1.484 +[gpub001:0/128] 2023-07-02 01:55:42,501 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/128] 2023-07-02 01:56:04,392 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 01:56:08,967 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 01:56:08,967 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpub001:0/128] 2023-07-02 01:56:08,971 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 02:02:36,435 (trainer:732) INFO: 1epoch:train:401-500batch: iter_time=1.606, forward_time=0.145, loss_ctc=338.668, loss_att=312.612, acc=0.065, loss=320.429, backward_time=1.102, grad_norm=722.907, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=1.129e-05, train_time=4.246 +[gpub001:0/128] 2023-07-02 02:05:05,101 (trainer:732) INFO: 1epoch:train:501-600batch: iter_time=9.538e-05, forward_time=0.144, loss_ctc=290.690, loss_att=280.674, acc=0.090, loss=283.679, backward_time=1.078, grad_norm=573.866, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=1.379e-05, train_time=1.486 +[gpub001:0/128] 2023-07-02 02:07:33,618 (trainer:732) INFO: 1epoch:train:601-700batch: iter_time=9.612e-05, forward_time=0.143, loss_ctc=302.811, loss_att=317.380, acc=0.118, loss=313.009, backward_time=1.077, grad_norm=525.059, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=1.629e-05, train_time=1.485 +[gpub001:0/128] 2023-07-02 02:10:02,081 (trainer:732) INFO: 1epoch:train:701-800batch: iter_time=9.252e-05, forward_time=0.143, loss_ctc=263.886, loss_att=258.073, acc=0.153, loss=259.817, backward_time=1.077, grad_norm=413.463, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=1.879e-05, train_time=1.484 +[gpub001:0/128] 2023-07-02 02:10:03,693 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/128] 2023-07-02 02:10:25,856 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 02:10:30,152 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 02:10:30,152 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpub001:0/128] 2023-07-02 02:10:30,156 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 02:18:38,072 (trainer:732) INFO: 1epoch:train:801-900batch: iter_time=1.499, forward_time=0.145, loss_ctc=297.783, loss_att=277.425, acc=0.155, loss=283.533, backward_time=1.202, grad_norm=432.564, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=2.129e-05, train_time=5.160 +[gpub001:0/128] 2023-07-02 02:21:09,704 (trainer:732) INFO: 1epoch:train:901-1000batch: iter_time=9.771e-05, forward_time=0.144, loss_ctc=274.105, loss_att=250.004, acc=0.168, loss=257.234, backward_time=1.078, grad_norm=408.850, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=2.379e-05, train_time=1.516 +[gpub001:0/128] 2023-07-02 02:23:38,330 (trainer:732) INFO: 1epoch:train:1001-1100batch: iter_time=1.074e-04, forward_time=0.143, loss_ctc=293.450, loss_att=288.752, acc=0.156, loss=290.161, backward_time=1.078, grad_norm=433.379, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=2.629e-05, train_time=1.486 +[gpub001:0/128] 2023-07-02 02:26:07,352 (trainer:732) INFO: 1epoch:train:1101-1200batch: iter_time=9.141e-05, forward_time=0.144, loss_ctc=254.103, loss_att=237.304, acc=0.175, loss=242.344, backward_time=1.079, grad_norm=378.541, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=2.879e-05, train_time=1.490 +[gpub001:0/128] 2023-07-02 02:26:09,180 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/128] 2023-07-02 02:26:31,466 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 02:26:35,764 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 02:26:35,764 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpub001:0/128] 2023-07-02 02:26:35,768 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 02:33:43,766 (trainer:732) INFO: 1epoch:train:1201-1300batch: iter_time=1.566, forward_time=0.182, loss_ctc=284.394, loss_att=260.556, acc=0.170, loss=267.707, backward_time=1.096, grad_norm=319.756, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.123, optim0_lr0=3.129e-05, train_time=4.564 +[gpub001:0/128] 2023-07-02 02:36:17,719 (trainer:732) INFO: 1epoch:train:1301-1400batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=264.134, loss_att=232.279, acc=0.185, loss=241.835, backward_time=1.082, grad_norm=322.456, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=3.379e-05, train_time=1.539 +[gpub001:0/128] 2023-07-02 02:38:56,271 (trainer:732) INFO: 1epoch:train:1401-1500batch: iter_time=9.806e-05, forward_time=0.145, loss_ctc=283.768, loss_att=254.155, acc=0.174, loss=263.039, backward_time=1.085, grad_norm=323.685, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=3.629e-05, train_time=1.585 +[gpub001:0/128] 2023-07-02 02:41:27,198 (trainer:732) INFO: 1epoch:train:1501-1600batch: iter_time=9.429e-05, forward_time=0.145, loss_ctc=248.736, loss_att=223.280, acc=0.187, loss=230.917, backward_time=1.079, grad_norm=295.086, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=3.879e-05, train_time=1.509 +[gpub001:0/128] 2023-07-02 02:41:41,817 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/128] 2023-07-02 02:42:03,909 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 02:42:08,170 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 02:42:08,170 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpub001:0/128] 2023-07-02 02:42:08,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 02:47:44,894 (trainer:732) INFO: 1epoch:train:1601-1700batch: iter_time=2.210, forward_time=0.168, loss_ctc=277.204, loss_att=247.225, acc=0.188, loss=256.219, backward_time=1.098, grad_norm=299.571, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=4.129e-05, train_time=3.776 +[gpub001:0/128] 2023-07-02 02:50:21,446 (trainer:732) INFO: 1epoch:train:1701-1800batch: iter_time=8.222e-05, forward_time=0.144, loss_ctc=256.731, loss_att=222.419, acc=0.198, loss=232.713, backward_time=1.097, grad_norm=275.648, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=4.379e-05, train_time=1.566 +[gpub001:0/128] 2023-07-02 02:53:02,939 (trainer:732) INFO: 1epoch:train:1801-1900batch: iter_time=8.187e-05, forward_time=0.144, loss_ctc=274.581, loss_att=254.361, acc=0.182, loss=260.427, backward_time=1.114, grad_norm=313.056, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=4.629e-05, train_time=1.615 +[gpub001:0/128] 2023-07-02 02:55:42,853 (trainer:732) INFO: 1epoch:train:1901-2000batch: iter_time=8.359e-05, forward_time=0.143, loss_ctc=236.100, loss_att=214.394, acc=0.200, loss=220.906, backward_time=1.087, grad_norm=255.461, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=4.879e-05, train_time=1.599 +[gpub001:0/128] 2023-07-02 02:55:53,991 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/128] 2023-07-02 02:56:16,340 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 02:56:20,644 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 02:56:20,644 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpub001:0/128] 2023-07-02 02:56:20,648 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 03:02:12,674 (trainer:732) INFO: 1epoch:train:2001-2100batch: iter_time=1.648, forward_time=0.145, loss_ctc=255.392, loss_att=234.231, acc=0.196, loss=240.580, backward_time=1.102, grad_norm=254.801, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=5.129e-05, train_time=3.898 +[gpub001:0/128] 2023-07-02 03:04:41,291 (trainer:732) INFO: 1epoch:train:2101-2200batch: iter_time=8.832e-05, forward_time=0.145, loss_ctc=228.533, loss_att=210.197, acc=0.210, loss=215.698, backward_time=1.080, grad_norm=265.408, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=5.379e-05, train_time=1.486 +[gpub001:0/128] 2023-07-02 03:07:09,624 (trainer:732) INFO: 1epoch:train:2201-2300batch: iter_time=9.138e-05, forward_time=0.144, loss_ctc=231.701, loss_att=230.702, acc=0.198, loss=231.002, backward_time=1.079, grad_norm=283.955, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=5.629e-05, train_time=1.483 +[gpub001:0/128] 2023-07-02 03:09:49,400 (trainer:732) INFO: 1epoch:train:2301-2400batch: iter_time=8.965e-05, forward_time=0.144, loss_ctc=196.101, loss_att=202.117, acc=0.215, loss=200.312, backward_time=1.099, grad_norm=248.405, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=5.879e-05, train_time=1.598 +[gpub001:0/128] 2023-07-02 03:09:51,010 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/128] 2023-07-02 03:10:13,711 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 03:10:18,017 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 03:10:18,018 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpub001:0/128] 2023-07-02 03:10:18,021 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 03:18:03,936 (trainer:732) INFO: 1epoch:train:2401-2500batch: iter_time=1.551, forward_time=0.146, loss_ctc=209.517, loss_att=221.916, acc=0.228, loss=218.197, backward_time=1.101, grad_norm=277.157, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=6.129e-05, train_time=4.945 +[gpub001:0/128] 2023-07-02 03:20:32,902 (trainer:732) INFO: 1epoch:train:2501-2600batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=197.334, loss_att=192.816, acc=0.252, loss=194.171, backward_time=1.080, grad_norm=292.260, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=6.379e-05, train_time=1.489 +[gpub001:0/128] 2023-07-02 03:23:26,427 (trainer:732) INFO: 1epoch:train:2601-2700batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=194.768, loss_att=218.914, acc=0.251, loss=211.670, backward_time=1.099, grad_norm=328.740, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=6.629e-05, train_time=1.735 +[gpub001:0/128] 2023-07-02 03:25:58,159 (trainer:732) INFO: 1epoch:train:2701-2800batch: iter_time=9.939e-05, forward_time=0.146, loss_ctc=167.623, loss_att=168.951, acc=0.295, loss=168.553, backward_time=1.081, grad_norm=241.927, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=6.879e-05, train_time=1.517 +[gpub001:0/128] 2023-07-02 03:26:07,547 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/128] 2023-07-02 03:26:29,813 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 03:26:34,386 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 03:26:34,386 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpub001:0/128] 2023-07-02 03:26:34,390 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 03:31:56,946 (trainer:732) INFO: 1epoch:train:2801-2900batch: iter_time=1.619, forward_time=0.172, loss_ctc=176.463, loss_att=177.295, acc=0.315, loss=177.046, backward_time=1.103, grad_norm=201.912, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.122, optim0_lr0=7.129e-05, train_time=3.588 +[gpub001:0/128] 2023-07-02 03:34:26,090 (trainer:732) INFO: 1epoch:train:2901-3000batch: iter_time=8.798e-05, forward_time=0.144, loss_ctc=169.381, loss_att=157.379, acc=0.328, loss=160.979, backward_time=1.077, grad_norm=221.600, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=7.379e-05, train_time=1.491 +[gpub001:0/128] 2023-07-02 03:36:55,616 (trainer:732) INFO: 1epoch:train:3001-3100batch: iter_time=9.043e-05, forward_time=0.146, loss_ctc=164.817, loss_att=190.673, acc=0.310, loss=182.916, backward_time=1.082, grad_norm=242.341, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=7.629e-05, train_time=1.495 +[gpub001:0/128] 2023-07-02 03:39:26,502 (trainer:732) INFO: 1epoch:train:3101-3200batch: iter_time=9.068e-05, forward_time=0.144, loss_ctc=152.565, loss_att=148.213, acc=0.340, loss=149.518, backward_time=1.083, grad_norm=231.027, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=7.879e-05, train_time=1.509 +[gpub001:0/128] 2023-07-02 03:39:30,891 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/128] 2023-07-02 03:39:53,618 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 03:39:57,929 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 03:39:57,929 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpub001:0/128] 2023-07-02 03:39:57,932 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 03:47:03,652 (trainer:732) INFO: 1epoch:train:3201-3300batch: iter_time=2.099, forward_time=0.144, loss_ctc=155.601, loss_att=164.815, acc=0.345, loss=162.051, backward_time=1.111, grad_norm=210.374, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=8.129e-05, train_time=4.571 +[gpub001:0/128] 2023-07-02 03:49:32,184 (trainer:732) INFO: 1epoch:train:3301-3400batch: iter_time=1.183e-04, forward_time=0.144, loss_ctc=152.491, loss_att=143.312, acc=0.360, loss=146.066, backward_time=1.076, grad_norm=205.020, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=8.379e-05, train_time=1.486 +[gpub001:0/128] 2023-07-02 03:52:01,611 (trainer:732) INFO: 1epoch:train:3401-3500batch: iter_time=1.060e-04, forward_time=0.145, loss_ctc=148.864, loss_att=168.528, acc=0.335, loss=162.629, backward_time=1.078, grad_norm=210.197, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=8.629e-05, train_time=1.494 +[gpub001:0/128] 2023-07-02 03:54:31,233 (trainer:732) INFO: 1epoch:train:3501-3600batch: iter_time=9.133e-05, forward_time=0.145, loss_ctc=135.953, loss_att=134.611, acc=0.371, loss=135.014, backward_time=1.079, grad_norm=179.182, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=8.879e-05, train_time=1.496 +[gpub001:0/128] 2023-07-02 03:54:40,963 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/128] 2023-07-02 03:55:03,510 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 03:55:07,768 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 03:55:07,768 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpub001:0/128] 2023-07-02 03:55:07,772 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 04:02:55,539 (trainer:732) INFO: 1epoch:train:3601-3700batch: iter_time=1.634, forward_time=0.172, loss_ctc=141.664, loss_att=149.355, acc=0.379, loss=147.048, backward_time=1.096, grad_norm=213.922, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=9.129e-05, train_time=5.043 +[gpub001:0/128] 2023-07-02 04:05:24,077 (trainer:732) INFO: 1epoch:train:3701-3800batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=140.225, loss_att=130.232, acc=0.390, loss=133.230, backward_time=1.077, grad_norm=217.100, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=9.379e-05, train_time=1.485 +[gpub001:0/128] 2023-07-02 04:07:53,148 (trainer:732) INFO: 1epoch:train:3801-3900batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=137.165, loss_att=157.511, acc=0.355, loss=151.407, backward_time=1.077, grad_norm=196.286, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=9.629e-05, train_time=1.490 +[gpub001:0/128] 2023-07-02 04:10:39,053 (trainer:732) INFO: 1epoch:train:3901-4000batch: iter_time=9.469e-05, forward_time=0.144, loss_ctc=124.160, loss_att=126.918, acc=0.390, loss=126.091, backward_time=1.090, grad_norm=241.832, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=9.879e-05, train_time=1.659 +[gpub001:0/128] 2023-07-02 04:20:41,655 (trainer:338) INFO: 1epoch results: [train] iter_time=0.424, forward_time=0.150, loss_ctc=243.208, loss_att=225.399, acc=0.220, loss=230.742, backward_time=1.090, grad_norm=327.236, clip=100.000, loss_scale=9.830e+04, optim_step_time=0.121, optim0_lr0=5.004e-05, train_time=2.262, time=2 hours, 31 minutes and 2.4 seconds, total_count=4000, gpu_max_cached_mem_GB=33.912, [valid] loss_ctc=122.230, cer_ctc=0.606, loss_att=129.277, acc=0.275, cer=0.670, wer=1.000, loss=127.163, time=3 minutes and 57.44 seconds, total_count=506, gpu_max_cached_mem_GB=37.207, [att_plot] time=5 minutes and 51.9 seconds, total_count=0, gpu_max_cached_mem_GB=37.207 +[gpub001:0/128] 2023-07-02 04:20:57,442 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub001:0/128] 2023-07-02 04:20:57,442 (trainer:272) INFO: 2/100epoch started. Estimated time to finish: 1 week, 4 days and 1 hour +[gpub001:0/128] 2023-07-02 04:20:57,445 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/128] 2023-07-02 04:21:19,393 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 04:21:23,457 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 04:21:23,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpub001:0/128] 2023-07-02 04:21:23,461 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 04:26:11,570 (trainer:732) INFO: 2epoch:train:1-100batch: iter_time=1.523, forward_time=0.165, loss_ctc=141.462, loss_att=147.694, acc=0.365, loss=145.825, backward_time=1.111, grad_norm=180.767, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.122, optim0_lr0=1.013e-04, train_time=3.141 +[gpub001:0/128] 2023-07-02 04:28:54,092 (trainer:732) INFO: 2epoch:train:101-200batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=127.915, loss_att=134.951, acc=0.409, loss=132.840, backward_time=1.103, grad_norm=154.665, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.038e-04, train_time=1.625 +[gpub001:0/128] 2023-07-02 04:31:31,556 (trainer:732) INFO: 2epoch:train:201-300batch: iter_time=9.197e-05, forward_time=0.200, loss_ctc=125.885, loss_att=122.785, acc=0.379, loss=123.715, backward_time=1.092, grad_norm=178.909, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.124, optim0_lr0=1.063e-04, train_time=1.574 +[gpub001:0/128] 2023-07-02 04:34:11,293 (trainer:732) INFO: 2epoch:train:301-400batch: iter_time=9.427e-05, forward_time=0.147, loss_ctc=121.684, loss_att=132.872, acc=0.377, loss=129.516, backward_time=1.105, grad_norm=197.046, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.088e-04, train_time=1.597 +[gpub001:0/128] 2023-07-02 04:34:19,159 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/128] 2023-07-02 04:34:40,978 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 04:34:45,187 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 04:34:45,187 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpub001:0/128] 2023-07-02 04:34:45,191 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 04:42:26,425 (trainer:732) INFO: 2epoch:train:401-500batch: iter_time=2.464, forward_time=0.183, loss_ctc=133.533, loss_att=141.745, acc=0.391, loss=139.281, backward_time=1.100, grad_norm=177.034, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.122, optim0_lr0=1.113e-04, train_time=4.951 +[gpub001:0/128] 2023-07-02 04:44:57,986 (trainer:732) INFO: 2epoch:train:501-600batch: iter_time=1.026e-04, forward_time=0.146, loss_ctc=117.575, loss_att=129.798, acc=0.436, loss=126.131, backward_time=1.087, grad_norm=157.194, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.138e-04, train_time=1.516 +[gpub001:0/128] 2023-07-02 04:47:30,329 (trainer:732) INFO: 2epoch:train:601-700batch: iter_time=1.003e-04, forward_time=0.146, loss_ctc=114.442, loss_att=114.728, acc=0.414, loss=114.642, backward_time=1.082, grad_norm=169.206, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.163e-04, train_time=1.523 +[gpub001:0/128] 2023-07-02 04:50:12,389 (trainer:732) INFO: 2epoch:train:701-800batch: iter_time=9.729e-05, forward_time=0.145, loss_ctc=117.962, loss_att=130.095, acc=0.398, loss=126.455, backward_time=1.107, grad_norm=202.411, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.188e-04, train_time=1.620 +[gpub001:0/128] 2023-07-02 04:50:19,407 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/128] 2023-07-02 04:50:41,623 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 04:50:45,945 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 04:50:45,945 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpub001:0/128] 2023-07-02 04:50:45,949 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 04:58:45,325 (trainer:732) INFO: 2epoch:train:801-900batch: iter_time=1.604, forward_time=0.186, loss_ctc=126.010, loss_att=132.556, acc=0.414, loss=130.592, backward_time=1.105, grad_norm=175.243, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.213e-04, train_time=5.129 +[gpub001:0/128] 2023-07-02 05:01:17,736 (trainer:732) INFO: 2epoch:train:901-1000batch: iter_time=9.383e-05, forward_time=0.149, loss_ctc=109.712, loss_att=118.287, acc=0.466, loss=115.715, backward_time=1.088, grad_norm=142.149, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.238e-04, train_time=1.524 +[gpub001:0/128] 2023-07-02 05:03:47,575 (trainer:732) INFO: 2epoch:train:1001-1100batch: iter_time=1.014e-04, forward_time=0.147, loss_ctc=110.053, loss_att=106.480, acc=0.439, loss=107.552, backward_time=1.086, grad_norm=153.951, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.263e-04, train_time=1.498 +[gpub001:0/128] 2023-07-02 05:06:16,384 (trainer:732) INFO: 2epoch:train:1101-1200batch: iter_time=9.267e-05, forward_time=0.147, loss_ctc=107.375, loss_att=120.439, acc=0.425, loss=116.520, backward_time=1.082, grad_norm=193.914, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.288e-04, train_time=1.488 +[gpub001:0/128] 2023-07-02 05:06:27,229 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/128] 2023-07-02 05:06:49,654 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 05:06:54,182 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 05:06:54,182 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpub001:0/128] 2023-07-02 05:06:54,186 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 05:13:53,395 (trainer:732) INFO: 2epoch:train:1201-1300batch: iter_time=2.083, forward_time=0.188, loss_ctc=119.429, loss_att=122.178, acc=0.442, loss=121.353, backward_time=1.161, grad_norm=211.966, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.122, optim0_lr0=1.313e-04, train_time=4.569 +[gpub001:0/128] 2023-07-02 05:16:51,682 (trainer:732) INFO: 2epoch:train:1301-1400batch: iter_time=8.730e-05, forward_time=0.148, loss_ctc=105.735, loss_att=111.683, acc=0.485, loss=109.899, backward_time=1.179, grad_norm=138.234, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.338e-04, train_time=1.784 +[gpub001:0/128] 2023-07-02 05:19:35,325 (trainer:732) INFO: 2epoch:train:1401-1500batch: iter_time=9.301e-05, forward_time=0.145, loss_ctc=104.633, loss_att=99.671, acc=0.460, loss=101.159, backward_time=1.106, grad_norm=170.937, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.363e-04, train_time=1.636 +[gpub001:0/128] 2023-07-02 05:22:31,703 (trainer:732) INFO: 2epoch:train:1501-1600batch: iter_time=9.055e-05, forward_time=0.146, loss_ctc=102.772, loss_att=110.998, acc=0.455, loss=108.530, backward_time=1.109, grad_norm=160.641, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.388e-04, train_time=1.764 +[gpub001:0/128] 2023-07-02 05:22:33,982 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/128] 2023-07-02 05:22:56,271 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 05:23:00,511 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 05:23:00,511 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpub001:0/128] 2023-07-02 05:23:00,515 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 05:29:35,230 (trainer:732) INFO: 2epoch:train:1601-1700batch: iter_time=1.551, forward_time=0.149, loss_ctc=113.295, loss_att=115.104, acc=0.459, loss=114.561, backward_time=1.108, grad_norm=155.653, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.413e-04, train_time=4.235 +[gpub001:0/128] 2023-07-02 05:32:13,472 (trainer:732) INFO: 2epoch:train:1701-1800batch: iter_time=1.052e-04, forward_time=0.146, loss_ctc=103.800, loss_att=106.712, acc=0.499, loss=105.838, backward_time=1.096, grad_norm=148.694, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.438e-04, train_time=1.582 +[gpub001:0/128] 2023-07-02 05:34:44,526 (trainer:732) INFO: 2epoch:train:1801-1900batch: iter_time=1.072e-04, forward_time=0.147, loss_ctc=101.364, loss_att=92.579, acc=0.479, loss=95.215, backward_time=1.082, grad_norm=163.286, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.463e-04, train_time=1.510 +[gpub001:0/128] 2023-07-02 05:37:18,235 (trainer:732) INFO: 2epoch:train:1901-2000batch: iter_time=9.932e-05, forward_time=0.146, loss_ctc=102.646, loss_att=106.142, acc=0.463, loss=105.093, backward_time=1.095, grad_norm=182.571, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.488e-04, train_time=1.537 +[gpub001:0/128] 2023-07-02 05:37:19,899 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/128] 2023-07-02 05:37:42,251 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 05:37:46,475 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 05:37:46,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpub001:0/128] 2023-07-02 05:37:46,479 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 05:48:10,631 (trainer:732) INFO: 2epoch:train:2001-2100batch: iter_time=1.530, forward_time=0.146, loss_ctc=116.459, loss_att=113.836, acc=0.467, loss=114.623, backward_time=1.101, grad_norm=190.683, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.513e-04, train_time=6.524 +[gpub001:0/128] 2023-07-02 05:50:53,948 (trainer:732) INFO: 2epoch:train:2101-2200batch: iter_time=9.630e-05, forward_time=0.149, loss_ctc=98.615, loss_att=98.760, acc=0.520, loss=98.716, backward_time=1.108, grad_norm=133.124, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.538e-04, train_time=1.633 +[gpub001:0/128] 2023-07-02 05:53:22,527 (trainer:732) INFO: 2epoch:train:2201-2300batch: iter_time=1.008e-04, forward_time=0.146, loss_ctc=96.354, loss_att=87.441, acc=0.497, loss=90.115, backward_time=1.080, grad_norm=137.996, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.563e-04, train_time=1.486 +[gpub001:0/128] 2023-07-02 05:55:51,147 (trainer:732) INFO: 2epoch:train:2301-2400batch: iter_time=9.314e-05, forward_time=0.147, loss_ctc=97.507, loss_att=99.440, acc=0.485, loss=98.860, backward_time=1.081, grad_norm=204.800, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.588e-04, train_time=1.486 +[gpub001:0/128] 2023-07-02 05:55:53,085 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/128] 2023-07-02 05:56:15,243 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 05:56:19,560 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 05:56:19,560 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpub001:0/128] 2023-07-02 05:56:19,564 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 06:03:08,333 (trainer:732) INFO: 2epoch:train:2401-2500batch: iter_time=1.611, forward_time=0.178, loss_ctc=108.028, loss_att=109.475, acc=0.488, loss=109.041, backward_time=1.104, grad_norm=160.868, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.122, optim0_lr0=1.613e-04, train_time=4.371 +[gpub001:0/128] 2023-07-02 06:05:38,294 (trainer:732) INFO: 2epoch:train:2501-2600batch: iter_time=9.944e-05, forward_time=0.147, loss_ctc=100.030, loss_att=100.384, acc=0.532, loss=100.278, backward_time=1.086, grad_norm=166.489, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.638e-04, train_time=1.500 +[gpub001:0/128] 2023-07-02 06:08:07,446 (trainer:732) INFO: 2epoch:train:2601-2700batch: iter_time=1.010e-04, forward_time=0.147, loss_ctc=95.719, loss_att=86.792, acc=0.515, loss=89.470, backward_time=1.082, grad_norm=137.126, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.663e-04, train_time=1.491 +[gpub001:0/128] 2023-07-02 06:10:36,538 (trainer:732) INFO: 2epoch:train:2701-2800batch: iter_time=1.025e-04, forward_time=0.148, loss_ctc=95.055, loss_att=99.392, acc=0.503, loss=98.091, backward_time=1.084, grad_norm=141.816, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.688e-04, train_time=1.491 +[gpub001:0/128] 2023-07-02 06:10:46,481 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/128] 2023-07-02 06:11:08,639 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 06:11:12,893 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 06:11:12,893 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpub001:0/128] 2023-07-02 06:11:12,897 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 06:18:12,047 (trainer:732) INFO: 2epoch:train:2801-2900batch: iter_time=2.408, forward_time=0.148, loss_ctc=105.117, loss_att=104.174, acc=0.506, loss=104.457, backward_time=1.103, grad_norm=133.346, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.713e-04, train_time=4.555 +[gpub001:0/128] 2023-07-02 06:20:42,419 (trainer:732) INFO: 2epoch:train:2901-3000batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=97.400, loss_att=94.758, acc=0.546, loss=95.551, backward_time=1.086, grad_norm=188.040, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.738e-04, train_time=1.504 +[gpub001:0/128] 2023-07-02 06:23:24,163 (trainer:732) INFO: 2epoch:train:3001-3100batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=98.565, loss_att=87.186, acc=0.519, loss=90.599, backward_time=1.106, grad_norm=160.472, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.763e-04, train_time=1.617 +[gpub001:0/128] 2023-07-02 06:25:58,596 (trainer:732) INFO: 2epoch:train:3101-3200batch: iter_time=9.116e-05, forward_time=0.146, loss_ctc=94.556, loss_att=97.305, acc=0.507, loss=96.480, backward_time=1.088, grad_norm=152.535, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.788e-04, train_time=1.544 +[gpub001:0/128] 2023-07-02 06:26:00,561 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/128] 2023-07-02 06:26:23,005 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 06:26:27,267 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 06:26:27,267 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpub001:0/128] 2023-07-02 06:26:27,271 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 06:33:58,814 (trainer:732) INFO: 2epoch:train:3201-3300batch: iter_time=1.523, forward_time=0.163, loss_ctc=105.148, loss_att=101.685, acc=0.514, loss=102.724, backward_time=1.128, grad_norm=146.660, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.122, optim0_lr0=1.813e-04, train_time=4.802 +[gpub001:0/128] 2023-07-02 06:36:43,188 (trainer:732) INFO: 2epoch:train:3301-3400batch: iter_time=8.705e-05, forward_time=0.148, loss_ctc=93.198, loss_att=91.092, acc=0.554, loss=91.724, backward_time=1.095, grad_norm=154.320, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.838e-04, train_time=1.644 +[gpub001:0/128] 2023-07-02 06:39:35,381 (trainer:732) INFO: 2epoch:train:3401-3500batch: iter_time=9.148e-05, forward_time=0.145, loss_ctc=93.879, loss_att=82.609, acc=0.528, loss=85.990, backward_time=1.097, grad_norm=142.568, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.863e-04, train_time=1.722 +[gpub001:0/128] 2023-07-02 06:42:04,168 (trainer:732) INFO: 2epoch:train:3501-3600batch: iter_time=8.728e-05, forward_time=0.146, loss_ctc=91.983, loss_att=91.456, acc=0.522, loss=91.614, backward_time=1.081, grad_norm=151.785, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.888e-04, train_time=1.488 +[gpub001:0/128] 2023-07-02 06:42:05,883 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/128] 2023-07-02 06:42:28,495 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 06:42:32,765 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 06:42:32,765 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpub001:0/128] 2023-07-02 06:42:32,769 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 06:48:18,823 (trainer:732) INFO: 2epoch:train:3601-3700batch: iter_time=1.599, forward_time=0.170, loss_ctc=105.844, loss_att=98.496, acc=0.521, loss=100.700, backward_time=1.106, grad_norm=150.808, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.913e-04, train_time=3.746 +[gpub001:0/128] 2023-07-02 06:50:50,784 (trainer:732) INFO: 2epoch:train:3701-3800batch: iter_time=9.644e-05, forward_time=0.146, loss_ctc=94.459, loss_att=88.622, acc=0.558, loss=90.373, backward_time=1.084, grad_norm=127.971, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.938e-04, train_time=1.520 +[gpub001:0/128] 2023-07-02 06:53:19,863 (trainer:732) INFO: 2epoch:train:3801-3900batch: iter_time=9.916e-05, forward_time=0.146, loss_ctc=90.685, loss_att=77.556, acc=0.547, loss=81.494, backward_time=1.082, grad_norm=162.079, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.963e-04, train_time=1.491 +[gpub001:0/128] 2023-07-02 06:55:49,487 (trainer:732) INFO: 2epoch:train:3901-4000batch: iter_time=9.854e-05, forward_time=0.145, loss_ctc=93.244, loss_att=90.302, acc=0.527, loss=91.184, backward_time=1.080, grad_norm=140.916, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.988e-04, train_time=1.496 +[gpub001:0/128] 2023-07-02 07:05:45,651 (trainer:338) INFO: 2epoch results: [train] iter_time=0.448, forward_time=0.153, loss_ctc=106.978, loss_att=107.457, acc=0.475, loss=107.313, backward_time=1.099, grad_norm=162.472, clip=100.000, loss_scale=3.932e+05, optim_step_time=0.121, optim0_lr0=1.500e-04, train_time=2.323, time=2 hours, 35 minutes and 3.24 seconds, total_count=8000, gpu_max_cached_mem_GB=37.209, [valid] loss_ctc=97.384, cer_ctc=0.435, loss_att=89.451, acc=0.424, cer=0.559, wer=1.000, loss=91.831, time=3 minutes and 54.37 seconds, total_count=1012, gpu_max_cached_mem_GB=37.209, [att_plot] time=5 minutes and 50.55 seconds, total_count=0, gpu_max_cached_mem_GB=37.209 +[gpub001:0/128] 2023-07-02 07:06:05,363 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub001:0/128] 2023-07-02 07:06:05,432 (trainer:272) INFO: 3/100epoch started. Estimated time to finish: 1 week, 4 days and 2 hours +[gpub001:0/128] 2023-07-02 07:06:06,952 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/128] 2023-07-02 07:06:28,662 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 07:06:35,113 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 07:06:35,113 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpub001:0/128] 2023-07-02 07:06:35,236 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 07:14:50,669 (trainer:732) INFO: 3epoch:train:1-100batch: iter_time=3.597, forward_time=0.184, loss_ctc=99.267, loss_att=94.412, acc=0.525, loss=95.868, backward_time=1.110, grad_norm=176.962, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.123, optim0_lr0=2.013e-04, train_time=5.245 +[gpub001:0/128] 2023-07-02 07:17:25,258 (trainer:732) INFO: 3epoch:train:101-200batch: iter_time=9.299e-05, forward_time=0.157, loss_ctc=113.344, loss_att=105.667, acc=0.511, loss=107.970, backward_time=1.096, grad_norm=200.073, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.038e-04, train_time=1.546 +[gpub001:0/128] 2023-07-02 07:20:08,475 (trainer:732) INFO: 3epoch:train:201-300batch: iter_time=9.056e-05, forward_time=0.183, loss_ctc=101.734, loss_att=111.837, acc=0.507, loss=108.806, backward_time=1.108, grad_norm=163.340, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.123, optim0_lr0=2.063e-04, train_time=1.632 +[gpub001:0/128] 2023-07-02 07:22:51,493 (trainer:732) INFO: 3epoch:train:301-400batch: iter_time=8.434e-05, forward_time=0.235, loss_ctc=104.299, loss_att=105.729, acc=0.534, loss=105.300, backward_time=1.113, grad_norm=189.640, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.125, optim0_lr0=2.088e-04, train_time=1.630 +[gpub001:0/128] 2023-07-02 07:23:00,414 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/128] 2023-07-02 07:23:22,231 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 07:23:26,426 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 07:23:26,426 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpub001:0/128] 2023-07-02 07:23:26,430 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 07:29:39,256 (trainer:732) INFO: 3epoch:train:401-500batch: iter_time=1.897, forward_time=0.147, loss_ctc=97.563, loss_att=88.949, acc=0.535, loss=91.534, backward_time=1.113, grad_norm=154.738, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.113e-04, train_time=4.077 +[gpub001:0/128] 2023-07-02 07:32:09,205 (trainer:732) INFO: 3epoch:train:501-600batch: iter_time=9.931e-05, forward_time=0.146, loss_ctc=105.782, loss_att=97.985, acc=0.527, loss=100.324, backward_time=1.085, grad_norm=177.959, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.138e-04, train_time=1.499 +[gpub001:0/128] 2023-07-02 07:34:38,498 (trainer:732) INFO: 3epoch:train:601-700batch: iter_time=1.042e-04, forward_time=0.147, loss_ctc=99.455, loss_att=104.887, acc=0.524, loss=103.257, backward_time=1.082, grad_norm=128.974, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.163e-04, train_time=1.493 +[gpub001:0/128] 2023-07-02 07:37:07,052 (trainer:732) INFO: 3epoch:train:701-800batch: iter_time=9.803e-05, forward_time=0.146, loss_ctc=98.270, loss_att=100.056, acc=0.545, loss=99.520, backward_time=1.081, grad_norm=161.994, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.188e-04, train_time=1.485 +[gpub001:0/128] 2023-07-02 07:37:09,329 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/128] 2023-07-02 07:37:31,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 07:37:35,616 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 07:37:35,616 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpub001:0/128] 2023-07-02 07:37:35,620 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 07:45:33,041 (trainer:732) INFO: 3epoch:train:801-900batch: iter_time=1.497, forward_time=0.145, loss_ctc=93.991, loss_att=84.725, acc=0.550, loss=87.505, backward_time=1.122, grad_norm=141.107, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.213e-04, train_time=5.060 +[gpub001:0/128] 2023-07-02 07:48:06,099 (trainer:732) INFO: 3epoch:train:901-1000batch: iter_time=9.286e-05, forward_time=0.146, loss_ctc=104.802, loss_att=93.877, acc=0.536, loss=97.154, backward_time=1.088, grad_norm=185.883, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.238e-04, train_time=1.530 +[gpub001:0/128] 2023-07-02 07:50:42,566 (trainer:732) INFO: 3epoch:train:1001-1100batch: iter_time=9.304e-05, forward_time=0.145, loss_ctc=98.120, loss_att=102.667, acc=0.530, loss=101.303, backward_time=1.094, grad_norm=167.675, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.263e-04, train_time=1.564 +[gpub001:0/128] 2023-07-02 07:53:11,245 (trainer:732) INFO: 3epoch:train:1101-1200batch: iter_time=8.833e-05, forward_time=0.144, loss_ctc=100.707, loss_att=99.351, acc=0.551, loss=99.758, backward_time=1.081, grad_norm=199.649, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.288e-04, train_time=1.487 +[gpub001:0/128] 2023-07-02 07:53:12,995 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/128] 2023-07-02 07:53:35,401 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 07:53:39,690 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 07:53:39,690 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpub001:0/128] 2023-07-02 07:53:39,694 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 07:59:30,939 (trainer:732) INFO: 3epoch:train:1201-1300batch: iter_time=1.559, forward_time=0.174, loss_ctc=94.186, loss_att=87.170, acc=0.539, loss=89.275, backward_time=1.106, grad_norm=328.801, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.313e-04, train_time=3.796 +[gpub001:0/128] 2023-07-02 08:02:05,406 (trainer:732) INFO: 3epoch:train:1301-1400batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=99.263, loss_att=93.429, acc=0.537, loss=95.179, backward_time=1.098, grad_norm=148.362, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.338e-04, train_time=1.545 +[gpub001:0/128] 2023-07-02 08:04:34,756 (trainer:732) INFO: 3epoch:train:1401-1500batch: iter_time=1.014e-04, forward_time=0.146, loss_ctc=94.767, loss_att=98.356, acc=0.536, loss=97.279, backward_time=1.083, grad_norm=130.626, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.363e-04, train_time=1.493 +[gpub001:0/128] 2023-07-02 08:07:05,149 (trainer:732) INFO: 3epoch:train:1501-1600batch: iter_time=1.019e-04, forward_time=0.145, loss_ctc=100.064, loss_att=98.786, acc=0.540, loss=99.170, backward_time=1.089, grad_norm=163.999, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.388e-04, train_time=1.504 +[gpub001:0/128] 2023-07-02 08:07:16,192 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/128] 2023-07-02 08:07:38,844 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 08:07:43,426 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 08:07:43,426 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpub001:0/128] 2023-07-02 08:07:43,431 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 08:15:07,803 (trainer:732) INFO: 3epoch:train:1601-1700batch: iter_time=2.782, forward_time=0.171, loss_ctc=91.055, loss_att=81.218, acc=0.556, loss=84.169, backward_time=1.110, grad_norm=128.434, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.122, optim0_lr0=2.413e-04, train_time=4.826 +[gpub001:0/128] 2023-07-02 08:17:37,685 (trainer:732) INFO: 3epoch:train:1701-1800batch: iter_time=1.161e-04, forward_time=0.149, loss_ctc=98.061, loss_att=89.586, acc=0.548, loss=92.128, backward_time=1.082, grad_norm=146.044, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.438e-04, train_time=1.499 +[gpub001:0/128] 2023-07-02 08:20:09,607 (trainer:732) INFO: 3epoch:train:1801-1900batch: iter_time=1.026e-04, forward_time=0.146, loss_ctc=98.893, loss_att=99.335, acc=0.537, loss=99.202, backward_time=1.081, grad_norm=155.831, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.463e-04, train_time=1.519 +[gpub001:0/128] 2023-07-02 08:22:38,993 (trainer:732) INFO: 3epoch:train:1901-2000batch: iter_time=1.062e-04, forward_time=0.147, loss_ctc=97.226, loss_att=98.121, acc=0.542, loss=97.853, backward_time=1.080, grad_norm=188.439, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.488e-04, train_time=1.494 +[gpub001:0/128] 2023-07-02 08:22:52,777 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/128] 2023-07-02 08:23:14,999 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 08:23:19,324 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 08:23:19,324 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpub001:0/128] 2023-07-02 08:23:19,328 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 08:30:03,463 (trainer:732) INFO: 3epoch:train:2001-2100batch: iter_time=2.802, forward_time=0.146, loss_ctc=91.586, loss_att=82.817, acc=0.563, loss=85.448, backward_time=1.105, grad_norm=140.635, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.494e-04, train_time=4.444 +[gpub001:0/128] 2023-07-02 08:32:33,043 (trainer:732) INFO: 3epoch:train:2101-2200batch: iter_time=1.024e-04, forward_time=0.147, loss_ctc=98.382, loss_att=88.675, acc=0.561, loss=91.587, backward_time=1.083, grad_norm=161.427, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.481e-04, train_time=1.496 +[gpub001:0/128] 2023-07-02 08:35:02,072 (trainer:732) INFO: 3epoch:train:2201-2300batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=94.521, loss_att=95.214, acc=0.555, loss=95.006, backward_time=1.083, grad_norm=120.258, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.469e-04, train_time=1.490 +[gpub001:0/128] 2023-07-02 08:37:38,878 (trainer:732) INFO: 3epoch:train:2301-2400batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=95.535, loss_att=91.789, acc=0.581, loss=92.913, backward_time=1.089, grad_norm=130.439, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.457e-04, train_time=1.568 +[gpub001:0/128] 2023-07-02 08:37:40,534 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/128] 2023-07-02 08:38:03,105 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 08:38:07,445 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 08:38:07,445 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpub001:0/128] 2023-07-02 08:38:07,449 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 08:44:55,772 (trainer:732) INFO: 3epoch:train:2401-2500batch: iter_time=1.576, forward_time=0.176, loss_ctc=88.566, loss_att=78.855, acc=0.571, loss=81.769, backward_time=1.099, grad_norm=125.630, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.122, optim0_lr0=2.445e-04, train_time=4.368 +[gpub001:0/128] 2023-07-02 08:47:25,999 (trainer:732) INFO: 3epoch:train:2501-2600batch: iter_time=1.039e-04, forward_time=0.146, loss_ctc=98.401, loss_att=88.608, acc=0.556, loss=91.546, backward_time=1.085, grad_norm=149.550, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.434e-04, train_time=1.502 +[gpub001:0/128] 2023-07-02 08:49:54,806 (trainer:732) INFO: 3epoch:train:2601-2700batch: iter_time=9.939e-05, forward_time=0.146, loss_ctc=92.927, loss_att=93.438, acc=0.553, loss=93.285, backward_time=1.081, grad_norm=122.391, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.422e-04, train_time=1.488 +[gpub001:0/128] 2023-07-02 08:52:23,625 (trainer:732) INFO: 3epoch:train:2701-2800batch: iter_time=9.931e-05, forward_time=0.146, loss_ctc=92.326, loss_att=92.323, acc=0.561, loss=92.324, backward_time=1.081, grad_norm=125.602, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.411e-04, train_time=1.488 +[gpub001:0/128] 2023-07-02 08:52:26,455 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/128] 2023-07-02 08:52:48,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 08:52:53,114 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 08:52:53,114 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpub001:0/128] 2023-07-02 08:52:53,118 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 08:59:50,440 (trainer:732) INFO: 3epoch:train:2801-2900batch: iter_time=1.553, forward_time=0.146, loss_ctc=93.235, loss_att=78.888, acc=0.571, loss=83.192, backward_time=1.117, grad_norm=128.780, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.400e-04, train_time=4.468 +[gpub001:0/128] 2023-07-02 09:02:28,341 (trainer:732) INFO: 3epoch:train:2901-3000batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=96.098, loss_att=83.970, acc=0.569, loss=87.609, backward_time=1.098, grad_norm=147.013, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.389e-04, train_time=1.579 +[gpub001:0/128] 2023-07-02 09:04:57,396 (trainer:732) INFO: 3epoch:train:3001-3100batch: iter_time=9.911e-05, forward_time=0.145, loss_ctc=91.842, loss_att=91.034, acc=0.560, loss=91.276, backward_time=1.081, grad_norm=132.321, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.378e-04, train_time=1.490 +[gpub001:0/128] 2023-07-02 09:07:27,650 (trainer:732) INFO: 3epoch:train:3101-3200batch: iter_time=9.178e-05, forward_time=0.145, loss_ctc=92.726, loss_att=90.434, acc=0.567, loss=91.122, backward_time=1.083, grad_norm=123.287, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.367e-04, train_time=1.502 +[gpub001:0/128] 2023-07-02 09:07:37,004 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/128] 2023-07-02 09:07:59,537 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 09:08:03,842 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 09:08:03,842 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpub001:0/128] 2023-07-02 09:08:03,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 09:15:19,332 (trainer:732) INFO: 3epoch:train:3201-3300batch: iter_time=1.643, forward_time=0.155, loss_ctc=86.892, loss_att=74.836, acc=0.582, loss=78.453, backward_time=1.104, grad_norm=119.753, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.357e-04, train_time=4.717 +[gpub001:0/128] 2023-07-02 09:17:49,867 (trainer:732) INFO: 3epoch:train:3301-3400batch: iter_time=9.086e-05, forward_time=0.147, loss_ctc=94.866, loss_att=80.655, acc=0.580, loss=84.918, backward_time=1.083, grad_norm=133.801, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.346e-04, train_time=1.505 +[gpub001:0/128] 2023-07-02 09:20:19,435 (trainer:732) INFO: 3epoch:train:3401-3500batch: iter_time=9.423e-05, forward_time=0.145, loss_ctc=91.040, loss_att=90.233, acc=0.562, loss=90.475, backward_time=1.080, grad_norm=122.931, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.336e-04, train_time=1.495 +[gpub001:0/128] 2023-07-02 09:22:47,882 (trainer:732) INFO: 3epoch:train:3501-3600batch: iter_time=9.105e-05, forward_time=0.145, loss_ctc=90.252, loss_att=86.651, acc=0.579, loss=87.731, backward_time=1.080, grad_norm=128.843, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.326e-04, train_time=1.484 +[gpub001:0/128] 2023-07-02 09:22:49,908 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/128] 2023-07-02 09:23:12,184 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 09:23:16,491 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 09:23:16,491 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpub001:0/128] 2023-07-02 09:23:16,495 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 09:29:14,252 (trainer:732) INFO: 3epoch:train:3601-3700batch: iter_time=2.007, forward_time=0.173, loss_ctc=86.671, loss_att=74.898, acc=0.593, loss=78.429, backward_time=1.111, grad_norm=109.421, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.122, optim0_lr0=2.316e-04, train_time=3.863 +[gpub001:0/128] 2023-07-02 09:31:55,295 (trainer:732) INFO: 3epoch:train:3701-3800batch: iter_time=1.082e-04, forward_time=0.146, loss_ctc=95.586, loss_att=81.639, acc=0.588, loss=85.823, backward_time=1.093, grad_norm=144.015, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.306e-04, train_time=1.610 +[gpub001:0/128] 2023-07-02 09:34:33,919 (trainer:732) INFO: 3epoch:train:3801-3900batch: iter_time=8.169e-05, forward_time=0.145, loss_ctc=88.969, loss_att=87.806, acc=0.582, loss=88.155, backward_time=1.090, grad_norm=123.158, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.296e-04, train_time=1.586 +[gpub001:0/128] 2023-07-02 09:37:16,204 (trainer:732) INFO: 3epoch:train:3901-4000batch: iter_time=7.505e-05, forward_time=0.146, loss_ctc=90.182, loss_att=85.377, acc=0.604, loss=86.819, backward_time=1.099, grad_norm=111.405, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.287e-04, train_time=1.623 +[gpub001:0/128] 2023-07-02 09:46:42,576 (trainer:338) INFO: 3epoch results: [train] iter_time=0.523, forward_time=0.153, loss_ctc=96.036, loss_att=91.357, acc=0.554, loss=92.761, backward_time=1.093, grad_norm=150.980, clip=100.000, loss_scale=1.573e+06, optim_step_time=0.121, optim0_lr0=2.318e-04, train_time=2.267, time=2 hours, 31 minutes and 22.78 seconds, total_count=12000, gpu_max_cached_mem_GB=37.209, [valid] loss_ctc=94.999, cer_ctc=0.408, loss_att=80.741, acc=0.476, cer=0.524, wer=1.000, loss=85.019, time=3 minutes and 26.69 seconds, total_count=1518, gpu_max_cached_mem_GB=37.209, [att_plot] time=5 minutes and 47.46 seconds, total_count=0, gpu_max_cached_mem_GB=37.209 +[gpub001:0/128] 2023-07-02 09:46:57,926 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub001:0/128] 2023-07-02 09:46:57,928 (trainer:272) INFO: 4/100epoch started. Estimated time to finish: 1 week, 3 days and 22 hours +[gpub001:0/128] 2023-07-02 09:46:57,931 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/128] 2023-07-02 09:47:20,132 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 09:47:24,739 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 09:47:24,739 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpub001:0/128] 2023-07-02 09:47:24,743 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 09:52:16,542 (trainer:732) INFO: 4epoch:train:1-100batch: iter_time=1.603, forward_time=0.147, loss_ctc=98.611, loss_att=79.913, acc=0.566, loss=85.523, backward_time=1.107, grad_norm=126.977, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.277e-04, train_time=3.186 +[gpub001:0/128] 2023-07-02 09:54:48,383 (trainer:732) INFO: 4epoch:train:101-200batch: iter_time=9.905e-05, forward_time=0.146, loss_ctc=93.292, loss_att=81.088, acc=0.588, loss=84.749, backward_time=1.087, grad_norm=140.335, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.268e-04, train_time=1.518 +[gpub001:0/128] 2023-07-02 09:57:28,030 (trainer:732) INFO: 4epoch:train:201-300batch: iter_time=1.015e-04, forward_time=0.144, loss_ctc=87.246, loss_att=71.450, acc=0.583, loss=76.189, backward_time=1.089, grad_norm=114.444, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.259e-04, train_time=1.596 +[gpub001:0/128] 2023-07-02 10:00:28,551 (trainer:732) INFO: 4epoch:train:301-400batch: iter_time=1.028e-04, forward_time=0.265, loss_ctc=98.629, loss_att=87.747, acc=0.596, loss=91.012, backward_time=1.121, grad_norm=129.662, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.124, optim0_lr0=2.249e-04, train_time=1.805 +[gpub001:0/128] 2023-07-02 10:00:31,903 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/128] 2023-07-02 10:00:53,880 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 10:00:58,136 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 10:00:58,136 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpub001:0/128] 2023-07-02 10:00:58,163 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 10:07:30,385 (trainer:732) INFO: 4epoch:train:401-500batch: iter_time=1.924, forward_time=0.160, loss_ctc=93.934, loss_att=78.095, acc=0.565, loss=82.847, backward_time=1.116, grad_norm=124.073, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.122, optim0_lr0=2.240e-04, train_time=4.218 +[gpub001:0/128] 2023-07-02 10:10:00,005 (trainer:732) INFO: 4epoch:train:501-600batch: iter_time=9.793e-05, forward_time=0.145, loss_ctc=95.139, loss_att=82.950, acc=0.579, loss=86.606, backward_time=1.084, grad_norm=150.418, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.231e-04, train_time=1.496 +[gpub001:0/128] 2023-07-02 10:12:28,946 (trainer:732) INFO: 4epoch:train:601-700batch: iter_time=9.726e-05, forward_time=0.145, loss_ctc=81.329, loss_att=68.312, acc=0.587, loss=72.217, backward_time=1.082, grad_norm=104.574, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.223e-04, train_time=1.489 +[gpub001:0/128] 2023-07-02 10:14:57,804 (trainer:732) INFO: 4epoch:train:701-800batch: iter_time=9.243e-05, forward_time=0.145, loss_ctc=98.008, loss_att=90.442, acc=0.585, loss=92.712, backward_time=1.081, grad_norm=138.562, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.214e-04, train_time=1.488 +[gpub001:0/128] 2023-07-02 10:15:11,245 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/128] 2023-07-02 10:15:34,137 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 10:15:38,468 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 10:15:38,468 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpub001:0/128] 2023-07-02 10:15:38,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 10:20:48,872 (trainer:732) INFO: 4epoch:train:801-900batch: iter_time=1.797, forward_time=0.161, loss_ctc=94.104, loss_att=75.650, acc=0.575, loss=81.186, backward_time=1.104, grad_norm=142.424, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.205e-04, train_time=3.510 +[gpub001:0/128] 2023-07-02 10:23:20,613 (trainer:732) INFO: 4epoch:train:901-1000batch: iter_time=9.979e-05, forward_time=0.145, loss_ctc=91.794, loss_att=79.748, acc=0.589, loss=83.362, backward_time=1.095, grad_norm=112.707, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.197e-04, train_time=1.517 +[gpub001:0/128] 2023-07-02 10:25:49,266 (trainer:732) INFO: 4epoch:train:1001-1100batch: iter_time=1.043e-04, forward_time=0.145, loss_ctc=80.587, loss_att=68.929, acc=0.594, loss=72.427, backward_time=1.079, grad_norm=117.919, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.188e-04, train_time=1.486 +[gpub001:0/128] 2023-07-02 10:28:17,736 (trainer:732) INFO: 4epoch:train:1101-1200batch: iter_time=9.663e-05, forward_time=0.145, loss_ctc=96.761, loss_att=88.378, acc=0.589, loss=90.893, backward_time=1.081, grad_norm=118.353, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.180e-04, train_time=1.484 +[gpub001:0/128] 2023-07-02 10:28:19,612 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/128] 2023-07-02 10:28:41,496 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 10:28:45,961 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 10:28:45,961 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpub001:0/128] 2023-07-02 10:28:45,965 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 10:34:43,152 (trainer:732) INFO: 4epoch:train:1201-1300batch: iter_time=1.606, forward_time=0.181, loss_ctc=91.554, loss_att=74.234, acc=0.591, loss=79.430, backward_time=1.106, grad_norm=113.931, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.172e-04, train_time=3.854 +[gpub001:0/128] 2023-07-02 10:37:13,480 (trainer:732) INFO: 4epoch:train:1301-1400batch: iter_time=7.940e-05, forward_time=0.146, loss_ctc=89.134, loss_att=77.040, acc=0.606, loss=80.668, backward_time=1.084, grad_norm=106.282, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.164e-04, train_time=1.503 +[gpub001:0/128] 2023-07-02 10:39:44,808 (trainer:732) INFO: 4epoch:train:1401-1500batch: iter_time=8.522e-05, forward_time=0.146, loss_ctc=78.916, loss_att=63.992, acc=0.613, loss=68.469, backward_time=1.088, grad_norm=96.651, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.156e-04, train_time=1.513 +[gpub001:0/128] 2023-07-02 10:42:13,272 (trainer:732) INFO: 4epoch:train:1501-1600batch: iter_time=7.945e-05, forward_time=0.145, loss_ctc=96.481, loss_att=83.388, acc=0.614, loss=87.316, backward_time=1.081, grad_norm=147.415, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.148e-04, train_time=1.484 +[gpub001:0/128] 2023-07-02 10:42:23,348 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/128] 2023-07-02 10:42:45,873 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 10:42:50,180 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 10:42:50,180 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpub001:0/128] 2023-07-02 10:42:50,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 10:49:13,034 (trainer:732) INFO: 4epoch:train:1601-1700batch: iter_time=1.974, forward_time=0.173, loss_ctc=88.077, loss_att=72.109, acc=0.591, loss=76.900, backward_time=1.098, grad_norm=115.526, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.140e-04, train_time=4.197 +[gpub001:0/128] 2023-07-02 10:51:42,877 (trainer:732) INFO: 4epoch:train:1701-1800batch: iter_time=7.686e-05, forward_time=0.145, loss_ctc=88.207, loss_att=74.904, acc=0.604, loss=78.895, backward_time=1.083, grad_norm=104.521, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.132e-04, train_time=1.498 +[gpub001:0/128] 2023-07-02 10:54:14,248 (trainer:732) INFO: 4epoch:train:1801-1900batch: iter_time=7.721e-05, forward_time=0.145, loss_ctc=77.966, loss_att=64.135, acc=0.608, loss=68.284, backward_time=1.084, grad_norm=108.621, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.124e-04, train_time=1.513 +[gpub001:0/128] 2023-07-02 10:56:42,821 (trainer:732) INFO: 4epoch:train:1901-2000batch: iter_time=7.336e-05, forward_time=0.144, loss_ctc=93.915, loss_att=85.705, acc=0.603, loss=88.168, backward_time=1.082, grad_norm=116.908, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.117e-04, train_time=1.486 +[gpub001:0/128] 2023-07-02 10:56:44,596 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/128] 2023-07-02 10:57:06,859 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 10:57:11,122 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 10:57:11,123 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpub001:0/128] 2023-07-02 10:57:11,126 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 11:04:53,694 (trainer:732) INFO: 4epoch:train:2001-2100batch: iter_time=1.699, forward_time=0.148, loss_ctc=88.188, loss_att=72.107, acc=0.591, loss=76.931, backward_time=1.102, grad_norm=116.420, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.109e-04, train_time=4.909 +[gpub001:0/128] 2023-07-02 11:07:24,536 (trainer:732) INFO: 4epoch:train:2101-2200batch: iter_time=1.028e-04, forward_time=0.148, loss_ctc=87.151, loss_att=74.177, acc=0.610, loss=78.069, backward_time=1.083, grad_norm=105.729, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.102e-04, train_time=1.508 +[gpub001:0/128] 2023-07-02 11:09:53,133 (trainer:732) INFO: 4epoch:train:2201-2300batch: iter_time=9.744e-05, forward_time=0.147, loss_ctc=79.577, loss_att=65.200, acc=0.609, loss=69.513, backward_time=1.080, grad_norm=100.328, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.094e-04, train_time=1.486 +[gpub001:0/128] 2023-07-02 11:12:23,324 (trainer:732) INFO: 4epoch:train:2301-2400batch: iter_time=9.173e-05, forward_time=0.147, loss_ctc=92.968, loss_att=84.240, acc=0.606, loss=86.858, backward_time=1.081, grad_norm=109.427, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.087e-04, train_time=1.502 +[gpub001:0/128] 2023-07-02 11:12:25,037 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/128] 2023-07-02 11:12:47,470 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 11:12:51,797 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 11:12:51,797 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpub001:0/128] 2023-07-02 11:12:51,801 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 11:19:12,113 (trainer:732) INFO: 4epoch:train:2401-2500batch: iter_time=1.546, forward_time=0.177, loss_ctc=87.759, loss_att=70.861, acc=0.599, loss=75.930, backward_time=1.115, grad_norm=136.550, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.122, optim0_lr0=2.080e-04, train_time=4.087 +[gpub001:0/128] 2023-07-02 11:21:41,041 (trainer:732) INFO: 4epoch:train:2501-2600batch: iter_time=7.723e-05, forward_time=0.145, loss_ctc=86.882, loss_att=73.524, acc=0.613, loss=77.532, backward_time=1.082, grad_norm=106.358, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.072e-04, train_time=1.489 +[gpub001:0/128] 2023-07-02 11:24:18,226 (trainer:732) INFO: 4epoch:train:2601-2700batch: iter_time=8.031e-05, forward_time=0.144, loss_ctc=78.351, loss_att=64.279, acc=0.611, loss=68.501, backward_time=1.102, grad_norm=109.443, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.065e-04, train_time=1.572 +[gpub001:0/128] 2023-07-02 11:27:00,239 (trainer:732) INFO: 4epoch:train:2701-2800batch: iter_time=8.117e-05, forward_time=0.144, loss_ctc=93.410, loss_att=83.593, acc=0.610, loss=86.538, backward_time=1.102, grad_norm=114.063, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.058e-04, train_time=1.620 +[gpub001:0/128] 2023-07-02 11:27:05,797 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/128] 2023-07-02 11:27:27,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 11:27:32,128 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 11:27:32,128 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpub001:0/128] 2023-07-02 11:27:32,132 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 11:34:05,899 (trainer:732) INFO: 4epoch:train:2801-2900batch: iter_time=2.044, forward_time=0.173, loss_ctc=87.698, loss_att=72.670, acc=0.606, loss=77.179, backward_time=1.104, grad_norm=115.150, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.051e-04, train_time=4.256 +[gpub001:0/128] 2023-07-02 11:36:36,429 (trainer:732) INFO: 4epoch:train:2901-3000batch: iter_time=9.052e-05, forward_time=0.147, loss_ctc=86.042, loss_att=73.284, acc=0.624, loss=77.111, backward_time=1.087, grad_norm=107.460, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.045e-04, train_time=1.505 +[gpub001:0/128] 2023-07-02 11:39:05,178 (trainer:732) INFO: 4epoch:train:3001-3100batch: iter_time=7.764e-05, forward_time=0.145, loss_ctc=76.124, loss_att=61.174, acc=0.628, loss=65.659, backward_time=1.081, grad_norm=99.575, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.038e-04, train_time=1.487 +[gpub001:0/128] 2023-07-02 11:41:33,800 (trainer:732) INFO: 4epoch:train:3101-3200batch: iter_time=7.370e-05, forward_time=0.144, loss_ctc=90.784, loss_att=79.546, acc=0.630, loss=82.917, backward_time=1.082, grad_norm=105.692, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.031e-04, train_time=1.486 +[gpub001:0/128] 2023-07-02 11:41:41,143 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/128] 2023-07-02 11:42:03,545 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 11:42:07,889 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 11:42:07,889 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpub001:0/128] 2023-07-02 11:42:07,893 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 11:48:55,570 (trainer:732) INFO: 4epoch:train:3201-3300batch: iter_time=2.460, forward_time=0.170, loss_ctc=86.826, loss_att=70.191, acc=0.612, loss=75.182, backward_time=1.105, grad_norm=122.938, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.024e-04, train_time=4.417 +[gpub001:0/128] 2023-07-02 11:51:25,881 (trainer:732) INFO: 4epoch:train:3301-3400batch: iter_time=9.428e-05, forward_time=0.147, loss_ctc=85.421, loss_att=72.776, acc=0.625, loss=76.569, backward_time=1.086, grad_norm=116.810, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.018e-04, train_time=1.503 +[gpub001:0/128] 2023-07-02 11:53:54,354 (trainer:732) INFO: 4epoch:train:3401-3500batch: iter_time=8.940e-05, forward_time=0.146, loss_ctc=76.243, loss_att=61.089, acc=0.628, loss=65.635, backward_time=1.079, grad_norm=102.428, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.011e-04, train_time=1.485 +[gpub001:0/128] 2023-07-02 11:56:51,221 (trainer:732) INFO: 4epoch:train:3501-3600batch: iter_time=8.309e-05, forward_time=0.146, loss_ctc=90.417, loss_att=79.209, acc=0.635, loss=82.572, backward_time=1.115, grad_norm=107.842, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.005e-04, train_time=1.768 +[gpub001:0/128] 2023-07-02 11:56:54,712 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/128] 2023-07-02 11:57:16,883 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 11:57:21,231 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 11:57:21,231 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpub001:0/128] 2023-07-02 11:57:21,235 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 12:04:28,387 (trainer:732) INFO: 4epoch:train:3601-3700batch: iter_time=1.574, forward_time=0.203, loss_ctc=87.050, loss_att=69.244, acc=0.619, loss=74.586, backward_time=1.115, grad_norm=102.912, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.123, optim0_lr0=1.998e-04, train_time=4.571 +[gpub001:0/128] 2023-07-02 12:06:58,225 (trainer:732) INFO: 4epoch:train:3701-3800batch: iter_time=9.426e-05, forward_time=0.147, loss_ctc=85.563, loss_att=72.811, acc=0.627, loss=76.637, backward_time=1.083, grad_norm=108.872, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=1.992e-04, train_time=1.499 +[gpub001:0/128] 2023-07-02 12:09:29,231 (trainer:732) INFO: 4epoch:train:3801-3900batch: iter_time=9.519e-05, forward_time=0.146, loss_ctc=76.919, loss_att=60.499, acc=0.633, loss=65.425, backward_time=1.079, grad_norm=100.011, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=1.986e-04, train_time=1.510 +[gpub001:0/128] 2023-07-02 12:11:59,799 (trainer:732) INFO: 4epoch:train:3901-4000batch: iter_time=8.595e-05, forward_time=0.146, loss_ctc=90.077, loss_att=77.617, acc=0.639, loss=81.355, backward_time=1.084, grad_norm=116.280, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=1.979e-04, train_time=1.505 +[gpub001:0/128] 2023-07-02 12:21:57,381 (trainer:338) INFO: 4epoch results: [train] iter_time=0.456, forward_time=0.155, loss_ctc=88.178, loss_att=74.657, acc=0.605, loss=78.714, backward_time=1.092, grad_norm=115.865, clip=100.000, loss_scale=6.291e+06, optim_step_time=0.121, optim0_lr0=2.118e-04, train_time=2.175, time=2 hours, 25 minutes and 14.26 seconds, total_count=16000, gpu_max_cached_mem_GB=37.211, [valid] loss_ctc=85.064, cer_ctc=0.401, loss_att=69.007, acc=0.517, cer=0.516, wer=1.000, loss=73.824, time=3 minutes and 47.38 seconds, total_count=2024, gpu_max_cached_mem_GB=37.211, [att_plot] time=5 minutes and 57.81 seconds, total_count=0, gpu_max_cached_mem_GB=37.211 +[gpub001:0/128] 2023-07-02 12:22:12,814 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub001:0/128] 2023-07-02 12:22:12,816 (trainer:272) INFO: 5/100epoch started. Estimated time to finish: 1 week, 3 days and 16 hours +[gpub001:0/128] 2023-07-02 12:22:12,819 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/128] 2023-07-02 12:22:35,284 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 12:22:41,131 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 12:22:41,131 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpub001:0/128] 2023-07-02 12:22:42,017 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 12:31:54,897 (trainer:732) INFO: 5epoch:train:1-100batch: iter_time=4.188, forward_time=0.167, loss_ctc=95.831, loss_att=83.013, acc=0.566, loss=86.858, backward_time=1.104, grad_norm=128.232, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.973e-04, train_time=5.821 +[gpub001:0/128] 2023-07-02 12:34:31,184 (trainer:732) INFO: 5epoch:train:101-200batch: iter_time=7.943e-05, forward_time=0.145, loss_ctc=86.928, loss_att=65.908, acc=0.615, loss=72.214, backward_time=1.097, grad_norm=107.199, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.967e-04, train_time=1.563 +[gpub001:0/128] 2023-07-02 12:37:14,909 (trainer:732) INFO: 5epoch:train:201-300batch: iter_time=8.068e-05, forward_time=0.145, loss_ctc=82.643, loss_att=66.057, acc=0.622, loss=71.033, backward_time=1.123, grad_norm=117.355, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.961e-04, train_time=1.637 +[gpub001:0/128] 2023-07-02 12:39:43,827 (trainer:732) INFO: 5epoch:train:301-400batch: iter_time=8.139e-05, forward_time=0.146, loss_ctc=95.498, loss_att=80.022, acc=0.595, loss=84.665, backward_time=1.081, grad_norm=144.339, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.955e-04, train_time=1.489 +[gpub001:0/128] 2023-07-02 12:39:51,380 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/128] 2023-07-02 12:40:12,992 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 12:40:17,133 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 12:40:17,133 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpub001:0/128] 2023-07-02 12:40:17,137 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 12:48:12,918 (trainer:732) INFO: 5epoch:train:401-500batch: iter_time=1.579, forward_time=0.147, loss_ctc=88.461, loss_att=76.736, acc=0.586, loss=80.254, backward_time=1.098, grad_norm=113.460, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.949e-04, train_time=5.091 +[gpub001:0/128] 2023-07-02 12:50:45,684 (trainer:732) INFO: 5epoch:train:501-600batch: iter_time=1.028e-04, forward_time=0.147, loss_ctc=85.023, loss_att=63.231, acc=0.628, loss=69.769, backward_time=1.084, grad_norm=107.817, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.943e-04, train_time=1.527 +[gpub001:0/128] 2023-07-02 12:53:13,961 (trainer:732) INFO: 5epoch:train:601-700batch: iter_time=1.013e-04, forward_time=0.144, loss_ctc=80.852, loss_att=65.135, acc=0.627, loss=69.850, backward_time=1.076, grad_norm=121.490, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.937e-04, train_time=1.483 +[gpub001:0/128] 2023-07-02 12:55:42,736 (trainer:732) INFO: 5epoch:train:701-800batch: iter_time=9.932e-05, forward_time=0.145, loss_ctc=94.110, loss_att=78.547, acc=0.604, loss=83.216, backward_time=1.079, grad_norm=120.640, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.932e-04, train_time=1.488 +[gpub001:0/128] 2023-07-02 12:55:44,316 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/128] 2023-07-02 12:56:07,456 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 12:56:11,744 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 12:56:11,744 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpub001:0/128] 2023-07-02 12:56:11,748 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 13:02:22,172 (trainer:732) INFO: 5epoch:train:801-900batch: iter_time=1.509, forward_time=0.148, loss_ctc=88.112, loss_att=77.763, acc=0.577, loss=80.868, backward_time=1.137, grad_norm=106.985, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.926e-04, train_time=3.994 +[gpub001:0/128] 2023-07-02 13:04:50,899 (trainer:732) INFO: 5epoch:train:901-1000batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=83.503, loss_att=64.856, acc=0.623, loss=70.450, backward_time=1.080, grad_norm=105.383, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.920e-04, train_time=1.487 +[gpub001:0/128] 2023-07-02 13:07:19,780 (trainer:732) INFO: 5epoch:train:1001-1100batch: iter_time=1.212e-04, forward_time=0.148, loss_ctc=78.804, loss_att=62.053, acc=0.633, loss=67.078, backward_time=1.083, grad_norm=99.605, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.915e-04, train_time=1.489 +[gpub001:0/128] 2023-07-02 13:09:48,658 (trainer:732) INFO: 5epoch:train:1101-1200batch: iter_time=1.177e-04, forward_time=0.147, loss_ctc=91.446, loss_att=76.278, acc=0.600, loss=80.828, backward_time=1.080, grad_norm=117.116, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.909e-04, train_time=1.489 +[gpub001:0/128] 2023-07-02 13:09:50,669 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/128] 2023-07-02 13:10:12,649 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 13:10:17,106 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 13:10:17,106 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpub001:0/128] 2023-07-02 13:10:17,110 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 13:16:06,569 (trainer:732) INFO: 5epoch:train:1201-1300batch: iter_time=1.607, forward_time=0.175, loss_ctc=85.120, loss_att=72.798, acc=0.593, loss=76.495, backward_time=1.101, grad_norm=103.506, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.122, optim0_lr0=1.903e-04, train_time=3.779 +[gpub001:0/128] 2023-07-02 13:18:36,586 (trainer:732) INFO: 5epoch:train:1301-1400batch: iter_time=1.060e-04, forward_time=0.145, loss_ctc=84.666, loss_att=64.196, acc=0.625, loss=70.337, backward_time=1.081, grad_norm=125.999, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.898e-04, train_time=1.500 +[gpub001:0/128] 2023-07-02 13:21:05,506 (trainer:732) INFO: 5epoch:train:1401-1500batch: iter_time=1.041e-04, forward_time=0.148, loss_ctc=76.989, loss_att=60.444, acc=0.639, loss=65.407, backward_time=1.082, grad_norm=93.583, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.892e-04, train_time=1.489 +[gpub001:0/128] 2023-07-02 13:23:34,157 (trainer:732) INFO: 5epoch:train:1501-1600batch: iter_time=8.877e-05, forward_time=0.147, loss_ctc=91.522, loss_att=76.267, acc=0.603, loss=80.844, backward_time=1.081, grad_norm=103.188, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.887e-04, train_time=1.486 +[gpub001:0/128] 2023-07-02 13:23:54,184 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/128] 2023-07-02 13:24:16,462 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 13:24:20,736 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 13:24:20,736 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpub001:0/128] 2023-07-02 13:24:20,752 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 13:30:23,399 (trainer:732) INFO: 5epoch:train:1601-1700batch: iter_time=2.283, forward_time=0.146, loss_ctc=86.174, loss_att=73.323, acc=0.604, loss=77.178, backward_time=1.124, grad_norm=110.109, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.882e-04, train_time=4.092 +[gpub001:0/128] 2023-07-02 13:32:53,039 (trainer:732) INFO: 5epoch:train:1701-1800batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=81.258, loss_att=60.193, acc=0.643, loss=66.513, backward_time=1.081, grad_norm=124.281, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.876e-04, train_time=1.496 +[gpub001:0/128] 2023-07-02 13:35:25,312 (trainer:732) INFO: 5epoch:train:1801-1900batch: iter_time=1.034e-04, forward_time=0.147, loss_ctc=76.781, loss_att=61.004, acc=0.646, loss=65.737, backward_time=1.083, grad_norm=106.544, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.871e-04, train_time=1.523 +[gpub001:0/128] 2023-07-02 13:38:08,039 (trainer:732) INFO: 5epoch:train:1901-2000batch: iter_time=1.047e-04, forward_time=0.147, loss_ctc=89.325, loss_att=74.875, acc=0.619, loss=79.210, backward_time=1.121, grad_norm=109.861, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.866e-04, train_time=1.627 +[gpub001:0/128] 2023-07-02 13:38:12,719 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/128] 2023-07-02 13:38:34,773 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 13:38:39,038 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 13:38:39,038 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpub001:0/128] 2023-07-02 13:38:39,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 13:45:41,717 (trainer:732) INFO: 5epoch:train:2001-2100batch: iter_time=1.533, forward_time=0.146, loss_ctc=86.087, loss_att=72.796, acc=0.601, loss=76.784, backward_time=1.099, grad_norm=114.369, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.861e-04, train_time=4.537 +[gpub001:0/128] 2023-07-02 13:48:14,220 (trainer:732) INFO: 5epoch:train:2101-2200batch: iter_time=1.082e-04, forward_time=0.145, loss_ctc=81.831, loss_att=60.613, acc=0.644, loss=66.978, backward_time=1.088, grad_norm=131.370, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.856e-04, train_time=1.525 +[gpub001:0/128] 2023-07-02 13:50:51,032 (trainer:732) INFO: 5epoch:train:2201-2300batch: iter_time=1.024e-04, forward_time=0.146, loss_ctc=77.795, loss_att=60.560, acc=0.646, loss=65.730, backward_time=1.089, grad_norm=113.810, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.851e-04, train_time=1.568 +[gpub001:0/128] 2023-07-02 13:53:32,141 (trainer:732) INFO: 5epoch:train:2301-2400batch: iter_time=1.003e-04, forward_time=0.147, loss_ctc=89.325, loss_att=72.509, acc=0.624, loss=77.553, backward_time=1.096, grad_norm=104.473, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.845e-04, train_time=1.611 +[gpub001:0/128] 2023-07-02 13:53:33,805 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/128] 2023-07-02 13:53:56,495 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 13:54:00,777 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 13:54:00,778 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpub001:0/128] 2023-07-02 13:54:00,781 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 14:00:22,334 (trainer:732) INFO: 5epoch:train:2401-2500batch: iter_time=1.553, forward_time=0.186, loss_ctc=83.017, loss_att=72.643, acc=0.601, loss=75.755, backward_time=1.101, grad_norm=101.899, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.122, optim0_lr0=1.840e-04, train_time=4.102 +[gpub001:0/128] 2023-07-02 14:02:53,893 (trainer:732) INFO: 5epoch:train:2501-2600batch: iter_time=1.039e-04, forward_time=0.143, loss_ctc=82.586, loss_att=62.473, acc=0.636, loss=68.507, backward_time=1.082, grad_norm=109.136, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.835e-04, train_time=1.515 +[gpub001:0/128] 2023-07-02 14:05:22,448 (trainer:732) INFO: 5epoch:train:2601-2700batch: iter_time=1.084e-04, forward_time=0.144, loss_ctc=78.078, loss_att=60.349, acc=0.643, loss=65.668, backward_time=1.077, grad_norm=104.700, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.831e-04, train_time=1.485 +[gpub001:0/128] 2023-07-02 14:08:06,353 (trainer:732) INFO: 5epoch:train:2701-2800batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=88.355, loss_att=73.214, acc=0.612, loss=77.756, backward_time=1.092, grad_norm=114.943, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.826e-04, train_time=1.639 +[gpub001:0/128] 2023-07-02 14:08:14,829 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/128] 2023-07-02 14:08:36,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 14:08:41,179 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 14:08:41,179 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpub001:0/128] 2023-07-02 14:08:41,183 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 14:16:05,822 (trainer:732) INFO: 5epoch:train:2801-2900batch: iter_time=1.639, forward_time=0.145, loss_ctc=84.011, loss_att=71.273, acc=0.614, loss=75.095, backward_time=1.103, grad_norm=125.591, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.821e-04, train_time=4.794 +[gpub001:0/128] 2023-07-02 14:18:39,318 (trainer:732) INFO: 5epoch:train:2901-3000batch: iter_time=2.634e-04, forward_time=0.168, loss_ctc=81.248, loss_att=59.851, acc=0.647, loss=66.270, backward_time=1.091, grad_norm=91.920, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.124, optim0_lr0=1.816e-04, train_time=1.535 +[gpub001:0/128] 2023-07-02 14:21:11,194 (trainer:732) INFO: 5epoch:train:3001-3100batch: iter_time=1.143e-04, forward_time=0.162, loss_ctc=75.675, loss_att=60.444, acc=0.652, loss=65.013, backward_time=1.084, grad_norm=93.222, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.811e-04, train_time=1.518 +[gpub001:0/128] 2023-07-02 14:23:48,791 (trainer:732) INFO: 5epoch:train:3101-3200batch: iter_time=1.141e-04, forward_time=0.164, loss_ctc=90.248, loss_att=72.932, acc=0.623, loss=78.127, backward_time=1.095, grad_norm=113.248, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.122, optim0_lr0=1.807e-04, train_time=1.576 +[gpub001:0/128] 2023-07-02 14:23:59,201 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/128] 2023-07-02 14:24:21,746 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 14:24:26,057 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 14:24:26,057 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpub001:0/128] 2023-07-02 14:24:26,061 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 14:31:50,659 (trainer:732) INFO: 5epoch:train:3201-3300batch: iter_time=1.865, forward_time=0.167, loss_ctc=83.228, loss_att=69.985, acc=0.610, loss=73.958, backward_time=1.102, grad_norm=113.113, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.122, optim0_lr0=1.802e-04, train_time=4.818 +[gpub001:0/128] 2023-07-02 14:34:19,355 (trainer:732) INFO: 5epoch:train:3301-3400batch: iter_time=9.341e-05, forward_time=0.144, loss_ctc=80.645, loss_att=61.484, acc=0.643, loss=67.232, backward_time=1.079, grad_norm=94.831, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.797e-04, train_time=1.487 +[gpub001:0/128] 2023-07-02 14:36:52,892 (trainer:732) INFO: 5epoch:train:3401-3500batch: iter_time=9.983e-05, forward_time=0.145, loss_ctc=74.661, loss_att=58.553, acc=0.648, loss=63.385, backward_time=1.091, grad_norm=112.096, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.793e-04, train_time=1.535 +[gpub001:0/128] 2023-07-02 14:39:27,050 (trainer:732) INFO: 5epoch:train:3501-3600batch: iter_time=8.702e-05, forward_time=0.147, loss_ctc=87.700, loss_att=72.286, acc=0.618, loss=76.910, backward_time=1.091, grad_norm=108.539, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.788e-04, train_time=1.541 +[gpub001:0/128] 2023-07-02 14:39:34,517 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/128] 2023-07-02 14:39:57,079 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 14:40:01,384 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 14:40:01,385 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpub001:0/128] 2023-07-02 14:40:01,388 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 14:47:22,407 (trainer:732) INFO: 5epoch:train:3601-3700batch: iter_time=2.338, forward_time=0.146, loss_ctc=82.381, loss_att=70.294, acc=0.609, loss=73.920, backward_time=1.111, grad_norm=95.828, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.783e-04, train_time=4.753 +[gpub001:0/128] 2023-07-02 14:49:58,572 (trainer:732) INFO: 5epoch:train:3701-3800batch: iter_time=1.275e-04, forward_time=0.147, loss_ctc=78.720, loss_att=59.946, acc=0.648, loss=65.578, backward_time=1.090, grad_norm=91.671, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.779e-04, train_time=1.561 +[gpub001:0/128] 2023-07-02 14:52:32,393 (trainer:732) INFO: 5epoch:train:3801-3900batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=76.291, loss_att=59.368, acc=0.648, loss=64.445, backward_time=1.080, grad_norm=102.358, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.774e-04, train_time=1.538 +[gpub001:0/128] 2023-07-02 14:55:03,373 (trainer:732) INFO: 5epoch:train:3901-4000batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=86.476, loss_att=71.726, acc=0.622, loss=76.151, backward_time=1.081, grad_norm=105.900, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.770e-04, train_time=1.510 +[gpub001:0/128] 2023-07-02 15:04:31,604 (trainer:338) INFO: 5epoch results: [train] iter_time=0.502, forward_time=0.150, loss_ctc=84.285, loss_att=68.150, acc=0.621, loss=72.991, backward_time=1.092, grad_norm=110.243, clip=100.000, loss_scale=2.517e+07, optim_step_time=0.121, optim0_lr0=1.866e-04, train_time=2.292, time=2 hours, 33 minutes and 0.88 seconds, total_count=20000, gpu_max_cached_mem_GB=37.211, [valid] loss_ctc=72.987, cer_ctc=0.380, loss_att=58.253, acc=0.554, cer=0.483, wer=0.990, loss=62.673, time=3 minutes and 29.52 seconds, total_count=2530, gpu_max_cached_mem_GB=37.211, [att_plot] time=5 minutes and 48.38 seconds, total_count=0, gpu_max_cached_mem_GB=37.211 +[gpub001:0/128] 2023-07-02 15:04:47,134 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub001:0/128] 2023-07-02 15:04:47,137 (trainer:272) INFO: 6/100epoch started. Estimated time to finish: 1 week, 3 days and 14 hours +[gpub001:0/128] 2023-07-02 15:04:47,140 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/128] 2023-07-02 15:05:08,659 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 15:05:12,886 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 15:05:12,886 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpub001:0/128] 2023-07-02 15:05:12,890 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 15:09:55,173 (trainer:732) INFO: 6epoch:train:1-100batch: iter_time=1.516, forward_time=0.173, loss_ctc=92.368, loss_att=76.121, acc=0.626, loss=80.995, backward_time=1.103, grad_norm=126.531, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.765e-04, train_time=3.080 +[gpub001:0/128] 2023-07-02 15:12:32,565 (trainer:732) INFO: 6epoch:train:101-200batch: iter_time=9.775e-05, forward_time=0.144, loss_ctc=83.143, loss_att=68.670, acc=0.613, loss=73.012, backward_time=1.090, grad_norm=97.187, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.761e-04, train_time=1.574 +[gpub001:0/128] 2023-07-02 15:15:24,315 (trainer:732) INFO: 6epoch:train:201-300batch: iter_time=1.016e-04, forward_time=0.145, loss_ctc=84.306, loss_att=66.722, acc=0.648, loss=71.997, backward_time=1.105, grad_norm=107.360, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.757e-04, train_time=1.717 +[gpub001:0/128] 2023-07-02 15:18:00,929 (trainer:732) INFO: 6epoch:train:301-400batch: iter_time=1.021e-04, forward_time=0.145, loss_ctc=91.913, loss_att=79.037, acc=0.636, loss=82.900, backward_time=1.090, grad_norm=110.113, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.752e-04, train_time=1.566 +[gpub001:0/128] 2023-07-02 15:18:17,251 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/128] 2023-07-02 15:18:39,584 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 15:18:43,772 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 15:18:43,772 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpub001:0/128] 2023-07-02 15:18:43,861 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 15:25:56,982 (trainer:732) INFO: 6epoch:train:401-500batch: iter_time=2.079, forward_time=0.167, loss_ctc=92.693, loss_att=72.766, acc=0.624, loss=78.744, backward_time=1.096, grad_norm=118.287, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.748e-04, train_time=4.760 +[gpub001:0/128] 2023-07-02 15:28:27,102 (trainer:732) INFO: 6epoch:train:501-600batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=81.911, loss_att=68.694, acc=0.611, loss=72.659, backward_time=1.081, grad_norm=97.202, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.744e-04, train_time=1.501 +[gpub001:0/128] 2023-07-02 15:30:56,188 (trainer:732) INFO: 6epoch:train:601-700batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=83.558, loss_att=64.117, acc=0.651, loss=69.949, backward_time=1.080, grad_norm=106.948, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.740e-04, train_time=1.491 +[gpub001:0/128] 2023-07-02 15:33:25,358 (trainer:732) INFO: 6epoch:train:701-800batch: iter_time=1.124e-04, forward_time=0.145, loss_ctc=87.949, loss_att=76.437, acc=0.632, loss=79.891, backward_time=1.082, grad_norm=108.261, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.735e-04, train_time=1.491 +[gpub001:0/128] 2023-07-02 15:33:27,210 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/128] 2023-07-02 15:33:49,343 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 15:33:53,609 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 15:33:53,609 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpub001:0/128] 2023-07-02 15:33:53,613 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 15:40:08,819 (trainer:732) INFO: 6epoch:train:801-900batch: iter_time=1.543, forward_time=0.145, loss_ctc=89.210, loss_att=70.814, acc=0.647, loss=76.333, backward_time=1.102, grad_norm=134.895, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.731e-04, train_time=4.034 +[gpub001:0/128] 2023-07-02 15:42:38,135 (trainer:732) INFO: 6epoch:train:901-1000batch: iter_time=1.010e-04, forward_time=0.145, loss_ctc=82.434, loss_att=67.557, acc=0.627, loss=72.020, backward_time=1.083, grad_norm=95.861, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.727e-04, train_time=1.493 +[gpub001:0/128] 2023-07-02 15:45:07,723 (trainer:732) INFO: 6epoch:train:1001-1100batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=81.188, loss_att=62.844, acc=0.667, loss=68.347, backward_time=1.083, grad_norm=93.331, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.723e-04, train_time=1.496 +[gpub001:0/128] 2023-07-02 15:47:38,402 (trainer:732) INFO: 6epoch:train:1101-1200batch: iter_time=1.003e-04, forward_time=0.147, loss_ctc=88.287, loss_att=74.520, acc=0.648, loss=78.650, backward_time=1.086, grad_norm=111.618, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.719e-04, train_time=1.507 +[gpub001:0/128] 2023-07-02 15:47:48,256 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/128] 2023-07-02 15:48:09,974 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 15:48:14,373 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 15:48:14,373 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpub001:0/128] 2023-07-02 15:48:14,377 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 15:54:23,324 (trainer:732) INFO: 6epoch:train:1201-1300batch: iter_time=1.601, forward_time=0.159, loss_ctc=88.348, loss_att=73.412, acc=0.647, loss=77.893, backward_time=1.101, grad_norm=112.423, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.715e-04, train_time=4.049 +[gpub001:0/128] 2023-07-02 15:56:53,521 (trainer:732) INFO: 6epoch:train:1301-1400batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=79.386, loss_att=65.071, acc=0.634, loss=69.365, backward_time=1.082, grad_norm=105.505, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.711e-04, train_time=1.502 +[gpub001:0/128] 2023-07-02 15:59:22,392 (trainer:732) INFO: 6epoch:train:1401-1500batch: iter_time=1.308e-04, forward_time=0.148, loss_ctc=81.481, loss_att=62.703, acc=0.665, loss=68.336, backward_time=1.081, grad_norm=101.416, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.707e-04, train_time=1.488 +[gpub001:0/128] 2023-07-02 16:01:52,855 (trainer:732) INFO: 6epoch:train:1501-1600batch: iter_time=1.191e-04, forward_time=0.148, loss_ctc=87.263, loss_att=73.197, acc=0.653, loss=77.416, backward_time=1.084, grad_norm=107.970, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.703e-04, train_time=1.504 +[gpub001:0/128] 2023-07-02 16:01:58,402 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/128] 2023-07-02 16:02:20,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/128] 2023-07-02 16:02:24,729 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/128] 2023-07-02 16:02:24,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpub001:0/128] 2023-07-02 16:02:24,733 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257 +[gpub001:0/128] 2023-07-02 16:07:49,090 (trainer:732) INFO: 6epoch:train:1601-1700batch: iter_time=1.671, forward_time=0.171, loss_ctc=90.466, loss_att=70.502, acc=0.651, loss=76.491, backward_time=1.106, grad_norm=124.483, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.123, optim0_lr0=1.699e-04, train_time=3.562 +[gpub001:0/128] 2023-07-02 16:10:20,476 (trainer:732) INFO: 6epoch:train:1701-1800batch: iter_time=9.879e-05, forward_time=0.146, loss_ctc=80.510, loss_att=64.788, acc=0.634, loss=69.505, backward_time=1.086, grad_norm=108.106, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.695e-04, train_time=1.514 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py:481: UserWarning: An error happens at loading "dump/raw/org/GigaST/XL.en-de/data/format.49/data_wav.ark:1438393521" + warnings.warn('An error happens at loading "{}"'.format(ark_name)) +ERROR:root:Error happened with path=exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5, type=kaldi_ark, id=GigaST_YOU0000008013_005722080_005750350_en_st_de +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 510, in train_one_epoch + for iiter, (utt_id, batch) in enumerate( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/reporter.py", line 267, in measure_iter_time + retval = next(iterator) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/iterators/multiple_iter_factory.py", line 35, in build_iter + yield from iter_factory.build_iter(epoch, shuffle) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 628, in __next__ + data = self._next_data() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data + return self._process_data(data) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data + data.reraise() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_utils.py", line 543, in reraise + raise exception +PermissionError: Caught PermissionError in DataLoader worker process 1. +Original Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop + data = fetcher.fetch(index) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch + data = [self.dataset[idx] for idx in possibly_batched_index] + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in + data = [self.dataset[idx] for idx in possibly_batched_index] + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/dataset.py", line 513, in __getitem__ + value = loader[uid] + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/dataset.py", line 52, in __getitem__ + retval = self.loader[key] + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py", line 479, in __getitem__ + return self._loader(ark_name) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/matio.py", line 235, in load_mat + fd_dict[ark] = open_like_kaldi(ark, "rb") + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py", line 207, in open_like_kaldi + return io.open(name, mode, encoding=encoding) +PermissionError: [Errno 13] Permission denied: 'dump/raw/org/GigaST/XL.en-de/data/format.49/data_wav.ark' + +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +srun: error: gpub040: task 14: Exited with exit code 1 +slurmstepd: error: *** STEP 2115302.0 ON gpub001 CANCELLED AT 2023-07-02T16:14:40 *** diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log new file mode 100644 index 0000000000000000000000000000000000000000..acb0d4eb57452bcc77640f5c9b58a42ec38842b2 --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log @@ -0,0 +1,4663 @@ +# Running on gpub002.delta.ncsa.illinois.edu +# Started at Wed Jul 12 13:15:16 CDT 2023 +# SLURMD_NODENAME=gpub002 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2147805 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2147805 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[002,008,010-011,019,027-028,030,050-053,073-074,078,084]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[002,008,010-011,019,027-028,030,050-053,073-074,078,084]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=2108111 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub002 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 +[gpub002:0/64] 2023-07-12 13:18:48,677 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub002:0/64] 2023-07-12 13:18:49,830 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub002:0/64] 2023-07-12 13:18:49,864 (s2t:483) INFO: Vocabulary size: 50002 +[gpub002:0/64] 2023-07-12 13:19:04,645 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub002:0/64] 2023-07-12 13:19:04,667 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub002:0/64] 2023-07-12 13:19:05,366 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub002:0/64] 2023-07-12 13:19:13,983 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 13:19:14,187 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 13:19:14,187 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub002:0/64] 2023-07-12 13:19:14,194 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub002:0/64] 2023-07-12 13:19:14,680 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub002:0/64] 2023-07-12 13:19:42,133 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub002:2108199:2108199 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2108199:2108199 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2108199:2108199 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub002:0/64] 2023-07-12 13:19:47,191 (trainer:284) INFO: 40/50epoch started +[gpub002:0/64] 2023-07-12 13:19:47,237 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-12 13:20:04,995 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 13:20:08,308 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 13:20:08,308 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-12 13:20:08,314 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpub011:1718215:1718215 [0] NCCL INFO cudaDriverVersion 12010 +gpub011:1718215:1718215 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1718215:1718215 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1718215:1718290 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1718215:1718290 [0] NCCL INFO Using network IB +gpub011:1718215:1718290 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub011:1718215:1718290 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub011:1718215:1718290 [0] NCCL INFO Connected all rings +gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub011:1718215:1718290 [0] NCCL INFO Connected all trees +gpub011:1718215:1718290 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub011:1718215:1718290 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1718215:1718290 [0] NCCL INFO comm 0x8e227720 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub011:1718216:1718216 [1] NCCL INFO cudaDriverVersion 12010 +gpub011:1718216:1718216 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1718216:1718216 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1718216:1718288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1718216:1718288 [1] NCCL INFO Using network IB +gpub011:1718216:1718288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub011:1718216:1718288 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub011:1718216:1718288 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub011:1718216:1718288 [1] NCCL INFO Connected all rings +gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub011:1718216:1718288 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub011:1718216:1718288 [1] NCCL INFO Connected all trees +gpub011:1718216:1718288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub011:1718216:1718288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1718216:1718288 [1] NCCL INFO comm 0x9d351fa0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub011:1718218:1718218 [3] NCCL INFO cudaDriverVersion 12010 +gpub011:1718218:1718218 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1718218:1718218 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1718218:1718289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1718218:1718289 [3] NCCL INFO Using network IB +gpub011:1718218:1718289 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub011:1718218:1718289 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub011:1718218:1718289 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub011:1718218:1718289 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub011:1718218:1718289 [3] NCCL INFO Connected all rings +gpub011:1718218:1718289 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub011:1718218:1718289 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub011:1718218:1718289 [3] NCCL INFO Connected all trees +gpub011:1718218:1718289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub011:1718218:1718289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1718218:1718289 [3] NCCL INFO comm 0x4fae7090 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub084:95632:95632 [3] NCCL INFO cudaDriverVersion 12010 +gpub084:95632:95632 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:95632:95632 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:95632:95714 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:95632:95714 [3] NCCL INFO Using network IB +gpub084:95632:95714 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub084:95632:95714 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub084:95632:95714 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub084:95632:95714 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub084:95632:95714 [3] NCCL INFO Connected all rings +gpub084:95632:95714 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub084:95632:95714 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub084:95632:95714 [3] NCCL INFO Connected all trees +gpub084:95632:95714 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:95632:95714 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:95632:95714 [3] NCCL INFO comm 0x9d28050 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub011:1718217:1718217 [2] NCCL INFO cudaDriverVersion 12010 +gpub011:1718217:1718217 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0> +gpub011:1718217:1718217 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub011:1718217:1718291 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0> +gpub011:1718217:1718291 [2] NCCL INFO Using network IB +gpub011:1718217:1718291 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub011:1718217:1718291 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub011:1718217:1718291 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub011:1718217:1718291 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub011:1718217:1718291 [2] NCCL INFO Connected all rings +gpub011:1718217:1718291 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub011:1718217:1718291 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub011:1718217:1718291 [2] NCCL INFO Connected all trees +gpub011:1718217:1718291 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub011:1718217:1718291 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub011:1718217:1718291 [2] NCCL INFO comm 0x50a009a0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub019:2611991:2611991 [2] NCCL INFO cudaDriverVersion 12010 +gpub019:2611991:2611991 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0> +gpub019:2611991:2611991 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub019:2611991:2612065 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0> +gpub019:2611991:2612065 [2] NCCL INFO Using network IB +gpub019:2611991:2612065 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub019:2611991:2612065 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub019:2611991:2612065 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub019:2611991:2612065 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub019:2611991:2612065 [2] NCCL INFO Connected all rings +gpub019:2611991:2612065 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub019:2611991:2612065 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub019:2611991:2612065 [2] NCCL INFO Connected all trees +gpub019:2611991:2612065 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub019:2611991:2612065 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub019:2611991:2612065 [2] NCCL INFO comm 0x10048ab0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:95631:95631 [2] NCCL INFO cudaDriverVersion 12010 +gpub084:95631:95631 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:95631:95631 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:95631:95712 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:95631:95712 [2] NCCL INFO Using network IB +gpub084:95631:95712 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub084:95631:95712 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub084:95631:95712 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub084:95631:95712 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub084:95631:95712 [2] NCCL INFO Connected all rings +gpub084:95631:95712 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub084:95631:95712 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub084:95631:95712 [2] NCCL INFO Connected all trees +gpub084:95631:95712 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:95631:95712 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:95631:95712 [2] NCCL INFO comm 0x940c750 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub074:3855653:3855653 [1] NCCL INFO cudaDriverVersion 12010 +gpub074:3855653:3855653 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3855653:3855653 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3855653:3855727 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3855653:3855727 [1] NCCL INFO Using network IB +gpub074:3855653:3855727 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub074:3855653:3855727 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub074:3855653:3855727 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub074:3855653:3855727 [1] NCCL INFO Connected all rings +gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub074:3855653:3855727 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub074:3855653:3855727 [1] NCCL INFO Connected all trees +gpub074:3855653:3855727 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3855653:3855727 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3855653:3855727 [1] NCCL INFO comm 0xaa1acf00 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub074:3855655:3855655 [3] NCCL INFO cudaDriverVersion 12010 +gpub074:3855655:3855655 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3855655:3855655 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3855655:3855725 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3855655:3855725 [3] NCCL INFO Using network IB +gpub074:3855655:3855725 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub074:3855655:3855725 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub074:3855655:3855725 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub074:3855655:3855725 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub074:3855655:3855725 [3] NCCL INFO Connected all rings +gpub074:3855655:3855725 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub074:3855655:3855725 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub074:3855655:3855725 [3] NCCL INFO Connected all trees +gpub074:3855655:3855725 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3855655:3855725 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3855655:3855725 [3] NCCL INFO comm 0x509a28d0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub074:3855652:3855652 [0] NCCL INFO cudaDriverVersion 12010 +gpub074:3855652:3855652 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3855652:3855652 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3855652:3855726 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3855652:3855726 [0] NCCL INFO Using network IB +gpub074:3855652:3855726 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub074:3855652:3855726 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub074:3855652:3855726 [0] NCCL INFO Connected all rings +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub074:3855652:3855726 [0] NCCL INFO Connected all trees +gpub074:3855652:3855726 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3855652:3855726 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3855652:3855726 [0] NCCL INFO comm 0x8e164a10 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub019:2611989:2611989 [0] NCCL INFO cudaDriverVersion 12010 +gpub019:2611989:2611989 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0> +gpub019:2611989:2611989 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub019:2611989:2612066 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0> +gpub019:2611989:2612066 [0] NCCL INFO Using network IB +gpub019:2611989:2612066 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub019:2611989:2612066 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub019:2611989:2612066 [0] NCCL INFO Connected all rings +gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub019:2611989:2612066 [0] NCCL INFO Connected all trees +gpub019:2611989:2612066 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub019:2611989:2612066 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub019:2611989:2612066 [0] NCCL INFO comm 0xa8ee89f0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub019:2611992:2611992 [3] NCCL INFO cudaDriverVersion 12010 +gpub019:2611992:2611992 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0> +gpub019:2611992:2611992 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub019:2611992:2612064 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0> +gpub019:2611992:2612064 [3] NCCL INFO Using network IB +gpub019:2611992:2612064 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub019:2611992:2612064 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub019:2611992:2612064 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub019:2611992:2612064 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub019:2611992:2612064 [3] NCCL INFO Connected all rings +gpub019:2611992:2612064 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub019:2611992:2612064 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub019:2611992:2612064 [3] NCCL INFO Connected all trees +gpub019:2611992:2612064 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub019:2611992:2612064 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub019:2611992:2612064 [3] NCCL INFO comm 0x4fcf2500 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub028:3104067:3104067 [0] NCCL INFO cudaDriverVersion 12010 +gpub028:3104067:3104067 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:3104067:3104067 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:3104067:3104152 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0> +gpub028:3104067:3104152 [0] NCCL INFO Using network IB +gpub028:3104067:3104152 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub028:3104067:3104152 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub028:3104067:3104152 [0] NCCL INFO Connected all rings +gpub008:2789793:2789793 [0] NCCL INFO cudaDriverVersion 12010 +gpub008:2789793:2789793 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2789793:2789793 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2789793:2789871 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2789793:2789871 [0] NCCL INFO Using network IB +gpub008:2789793:2789871 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub008:2789793:2789871 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub008:2789793:2789871 [0] NCCL INFO Connected all rings +gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub028:3104067:3104152 [0] NCCL INFO Connected all trees +gpub028:3104067:3104152 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:3104067:3104152 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:3104067:3104152 [0] NCCL INFO comm 0xa17fea0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub008:2789793:2789871 [0] NCCL INFO Connected all trees +gpub008:2789793:2789871 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2789793:2789871 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2789793:2789871 [0] NCCL INFO comm 0x9e41e050 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:1746407:1746407 [0] NCCL INFO cudaDriverVersion 12010 +gpub010:1746407:1746407 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1746407:1746407 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1746407:1746486 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0> +gpub010:1746407:1746486 [0] NCCL INFO Using network IB +gpub010:1746407:1746486 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub010:1746407:1746486 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub010:1746407:1746486 [0] NCCL INFO Connected all rings +gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub010:1746407:1746486 [0] NCCL INFO Connected all trees +gpub010:1746407:1746486 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub010:1746407:1746486 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1746407:1746486 [0] NCCL INFO comm 0xa1f0110 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:1746410:1746410 [3] NCCL INFO cudaDriverVersion 12010 +gpub010:1746410:1746410 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1746410:1746410 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1746410:1746485 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0> +gpub010:1746410:1746485 [3] NCCL INFO Using network IB +gpub010:1746410:1746485 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub010:1746410:1746485 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub010:1746410:1746485 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub010:1746410:1746485 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub010:1746410:1746485 [3] NCCL INFO Connected all rings +gpub010:1746410:1746485 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub010:1746410:1746485 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub010:1746410:1746485 [3] NCCL INFO Connected all trees +gpub010:1746410:1746485 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub010:1746410:1746485 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1746410:1746485 [3] NCCL INFO comm 0x95b8eb50 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub027:3834396:3834396 [0] NCCL INFO cudaDriverVersion 12010 +gpub027:3834396:3834396 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:3834396:3834396 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:3834396:3834476 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:3834396:3834476 [0] NCCL INFO Using network IB +gpub027:3834396:3834476 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub027:3834396:3834476 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub027:3834396:3834476 [0] NCCL INFO Connected all rings +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub027:3834396:3834476 [0] NCCL INFO Connected all trees +gpub027:3834396:3834476 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:3834396:3834476 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:3834396:3834476 [0] NCCL INFO comm 0x8b8afd50 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub019:2611990:2611990 [1] NCCL INFO cudaDriverVersion 12010 +gpub019:2611990:2611990 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0> +gpub019:2611990:2611990 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub019:2611990:2612063 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0> +gpub019:2611990:2612063 [1] NCCL INFO Using network IB +gpub019:2611990:2612063 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub019:2611990:2612063 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub019:2611990:2612063 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub019:2611990:2612063 [1] NCCL INFO Connected all rings +gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub019:2611990:2612063 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub019:2611990:2612063 [1] NCCL INFO Connected all trees +gpub019:2611990:2612063 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub019:2611990:2612063 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub019:2611990:2612063 [1] NCCL INFO comm 0x8916a60 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub030:2867869:2867869 [0] NCCL INFO cudaDriverVersion 12010 +gpub030:2867869:2867869 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2867869:2867869 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2867869:2867948 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2867869:2867948 [0] NCCL INFO Using network IB +gpub030:2867869:2867948 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub030:2867869:2867948 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub030:2867869:2867948 [0] NCCL INFO Connected all rings +gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub030:2867869:2867948 [0] NCCL INFO Connected all trees +gpub030:2867869:2867948 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2867869:2867948 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2867869:2867948 [0] NCCL INFO comm 0x236c1590 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub074:3855654:3855654 [2] NCCL INFO cudaDriverVersion 12010 +gpub074:3855654:3855654 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3855654:3855654 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3855654:3855724 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3855654:3855724 [2] NCCL INFO Using network IB +gpub074:3855654:3855724 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub074:3855654:3855724 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub074:3855654:3855724 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub074:3855654:3855724 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub074:3855654:3855724 [2] NCCL INFO Connected all rings +gpub074:3855654:3855724 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub074:3855654:3855724 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub074:3855654:3855724 [2] NCCL INFO Connected all trees +gpub074:3855654:3855724 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3855654:3855724 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3855654:3855724 [2] NCCL INFO comm 0xba937820 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub030:2867872:2867872 [3] NCCL INFO cudaDriverVersion 12010 +gpub030:2867872:2867872 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2867872:2867872 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2867872:2867950 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2867872:2867950 [3] NCCL INFO Using network IB +gpub030:2867872:2867950 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub030:2867872:2867950 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub030:2867872:2867950 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub030:2867872:2867950 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub030:2867872:2867950 [3] NCCL INFO Connected all rings +gpub030:2867872:2867950 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub030:2867872:2867950 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub030:2867872:2867950 [3] NCCL INFO Connected all trees +gpub030:2867872:2867950 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2867872:2867950 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2867872:2867950 [3] NCCL INFO comm 0x8db50450 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub078:387633:387633 [0] NCCL INFO cudaDriverVersion 12010 +gpub078:387633:387633 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:387633:387633 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:387633:387710 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:387633:387710 [0] NCCL INFO Using network IB +gpub078:387633:387710 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub078:387633:387710 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:387633:387710 [0] NCCL INFO Connected all rings +gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub078:387633:387710 [0] NCCL INFO Connected all trees +gpub078:387633:387710 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:387633:387710 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:387633:387710 [0] NCCL INFO comm 0x8b083970 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub030:2867870:2867870 [1] NCCL INFO cudaDriverVersion 12010 +gpub030:2867870:2867870 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2867870:2867870 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2867870:2867949 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2867870:2867949 [1] NCCL INFO Using network IB +gpub030:2867870:2867949 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub030:2867870:2867949 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub030:2867870:2867949 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub030:2867870:2867949 [1] NCCL INFO Connected all rings +gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub030:2867870:2867949 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub030:2867870:2867949 [1] NCCL INFO Connected all trees +gpub030:2867870:2867949 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2867870:2867949 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2867870:2867949 [1] NCCL INFO comm 0x9c29c010 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:2789796:2789796 [3] NCCL INFO cudaDriverVersion 12010 +gpub008:2789796:2789796 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2789796:2789796 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2789796:2789872 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2789796:2789872 [3] NCCL INFO Using network IB +gpub008:2789796:2789872 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub008:2789796:2789872 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub008:2789796:2789872 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub008:2789796:2789872 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub008:2789796:2789872 [3] NCCL INFO Connected all rings +gpub008:2789796:2789872 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub008:2789796:2789872 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub008:2789796:2789872 [3] NCCL INFO Connected all trees +gpub008:2789796:2789872 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2789796:2789872 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2789796:2789872 [3] NCCL INFO comm 0x50597af0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:2037082:2037082 [0] NCCL INFO cudaDriverVersion 12010 +gpub053:2037082:2037082 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:2037082:2037082 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:2037082:2037160 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:2037082:2037160 [0] NCCL INFO Using network IB +gpub053:2037082:2037160 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub053:2037082:2037160 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub053:2037082:2037160 [0] NCCL INFO Connected all rings +gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub053:2037082:2037160 [0] NCCL INFO Connected all trees +gpub053:2037082:2037160 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:2037082:2037160 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:2037082:2037160 [0] NCCL INFO comm 0x50aa6090 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub027:3834399:3834399 [3] NCCL INFO cudaDriverVersion 12010 +gpub027:3834399:3834399 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:3834399:3834399 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:3834399:3834474 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:3834399:3834474 [3] NCCL INFO Using network IB +gpub027:3834399:3834474 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub027:3834399:3834474 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub027:3834399:3834474 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub027:3834399:3834474 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub027:3834399:3834474 [3] NCCL INFO Connected all rings +gpub027:3834399:3834474 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub027:3834399:3834474 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub027:3834399:3834474 [3] NCCL INFO Connected all trees +gpub027:3834399:3834474 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:3834399:3834474 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:3834399:3834474 [3] NCCL INFO comm 0x8f1f3890 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:2037083:2037083 [1] NCCL INFO cudaDriverVersion 12010 +gpub053:2037083:2037083 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:2037083:2037083 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:2037083:2037161 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:2037083:2037161 [1] NCCL INFO Using network IB +gpub053:2037083:2037161 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub053:2037083:2037161 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub053:2037083:2037161 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub053:2037083:2037161 [1] NCCL INFO Connected all rings +gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub053:2037083:2037161 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub053:2037083:2037161 [1] NCCL INFO Connected all trees +gpub053:2037083:2037161 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:2037083:2037161 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:2037083:2037161 [1] NCCL INFO comm 0x4f89c530 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:2789795:2789795 [2] NCCL INFO cudaDriverVersion 12010 +gpub008:2789795:2789795 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2789795:2789795 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2789795:2789874 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2789795:2789874 [2] NCCL INFO Using network IB +gpub008:2789795:2789874 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub008:2789795:2789874 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub008:2789795:2789874 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub008:2789795:2789874 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub008:2789795:2789874 [2] NCCL INFO Connected all rings +gpub008:2789795:2789874 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub008:2789795:2789874 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub008:2789795:2789874 [2] NCCL INFO Connected all trees +gpub008:2789795:2789874 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2789795:2789874 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2789795:2789874 [2] NCCL INFO comm 0xb7cc7790 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:387636:387636 [3] NCCL INFO cudaDriverVersion 12010 +gpub078:387636:387636 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:387636:387636 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:387636:387711 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:387636:387711 [3] NCCL INFO Using network IB +gpub078:387636:387711 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub078:387636:387711 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub078:387636:387711 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:387636:387711 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:387636:387711 [3] NCCL INFO Connected all rings +gpub078:387636:387711 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:387636:387711 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:387636:387711 [3] NCCL INFO Connected all trees +gpub078:387636:387711 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:387636:387711 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:387636:387711 [3] NCCL INFO comm 0x50bf4280 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub073:748599:748599 [2] NCCL INFO cudaDriverVersion 12010 +gpub073:748599:748599 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:748599:748599 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:748599:748672 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:748599:748672 [2] NCCL INFO Using network IB +gpub073:748599:748672 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub073:748599:748672 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub073:748599:748672 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub073:748599:748672 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub073:748599:748672 [2] NCCL INFO Connected all rings +gpub073:748599:748672 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub073:748599:748672 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub073:748599:748672 [2] NCCL INFO Connected all trees +gpub073:748599:748672 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:748599:748672 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:748599:748672 [2] NCCL INFO comm 0xa2d1650 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub053:2037084:2037084 [2] NCCL INFO cudaDriverVersion 12010 +gpub053:2037084:2037084 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:2037084:2037084 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:2037084:2037163 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:2037084:2037163 [2] NCCL INFO Using network IB +gpub053:2037084:2037163 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub053:2037084:2037163 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub053:2037084:2037163 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub053:2037084:2037163 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub053:2037084:2037163 [2] NCCL INFO Connected all rings +gpub053:2037084:2037163 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub053:2037084:2037163 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub053:2037084:2037163 [2] NCCL INFO Connected all trees +gpub053:2037084:2037163 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:2037084:2037163 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:2037084:2037163 [2] NCCL INFO comm 0x8c08e1a0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:387635:387635 [2] NCCL INFO cudaDriverVersion 12010 +gpub078:387635:387635 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:387635:387635 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:387635:387713 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:387635:387713 [2] NCCL INFO Using network IB +gpub078:387635:387713 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub078:387635:387713 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub078:387635:387713 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:387635:387713 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:387635:387713 [2] NCCL INFO Connected all rings +gpub078:387635:387713 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:387635:387713 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:387635:387713 [2] NCCL INFO Connected all trees +gpub078:387635:387713 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:387635:387713 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:387635:387713 [2] NCCL INFO comm 0x9a633940 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub028:3104070:3104070 [3] NCCL INFO cudaDriverVersion 12010 +gpub028:3104070:3104070 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:3104070:3104070 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:3104070:3104149 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0> +gpub028:3104070:3104149 [3] NCCL INFO Using network IB +gpub028:3104070:3104149 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub028:3104070:3104149 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub028:3104070:3104149 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub028:3104070:3104149 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub028:3104070:3104149 [3] NCCL INFO Connected all rings +gpub028:3104070:3104149 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub028:3104070:3104149 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub028:3104070:3104149 [3] NCCL INFO Connected all trees +gpub028:3104070:3104149 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:3104070:3104149 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:3104070:3104149 [3] NCCL INFO comm 0xb81c6b50 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:2037085:2037085 [3] NCCL INFO cudaDriverVersion 12010 +gpub053:2037085:2037085 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:2037085:2037085 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:2037085:2037162 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:2037085:2037162 [3] NCCL INFO Using network IB +gpub053:2037085:2037162 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub053:2037085:2037162 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub053:2037085:2037162 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub053:2037085:2037162 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub053:2037085:2037162 [3] NCCL INFO Connected all rings +gpub053:2037085:2037162 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub053:2037085:2037162 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub053:2037085:2037162 [3] NCCL INFO Connected all trees +gpub053:2037085:2037162 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:2037085:2037162 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:2037085:2037162 [3] NCCL INFO comm 0x5026aaa0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub050:2539553:2539553 [1] NCCL INFO cudaDriverVersion 12010 +gpub050:2539553:2539553 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2539553:2539553 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2539553:2539629 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2539553:2539629 [1] NCCL INFO Using network IB +gpub050:2539553:2539629 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub050:2539553:2539629 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub050:2539553:2539629 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub050:2539553:2539629 [1] NCCL INFO Connected all rings +gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub050:2539553:2539629 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub050:2539553:2539629 [1] NCCL INFO Connected all trees +gpub050:2539553:2539629 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2539553:2539629 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2539553:2539629 [1] NCCL INFO comm 0xa4859b0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub027:3834397:3834397 [1] NCCL INFO cudaDriverVersion 12010 +gpub027:3834397:3834397 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:3834397:3834397 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:3834397:3834475 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:3834397:3834475 [1] NCCL INFO Using network IB +gpub027:3834397:3834475 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub027:3834397:3834475 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub027:3834397:3834475 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub027:3834397:3834475 [1] NCCL INFO Connected all rings +gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub027:3834397:3834475 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub027:3834397:3834475 [1] NCCL INFO Connected all trees +gpub027:3834397:3834475 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:3834397:3834475 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:3834397:3834475 [1] NCCL INFO comm 0x8ed34290 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub028:3104068:3104068 [1] NCCL INFO cudaDriverVersion 12010 +gpub028:3104068:3104068 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:3104068:3104068 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:3104068:3104151 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0> +gpub028:3104068:3104151 [1] NCCL INFO Using network IB +gpub028:3104068:3104151 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub028:3104068:3104151 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub028:3104068:3104151 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub028:3104068:3104151 [1] NCCL INFO Connected all rings +gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub028:3104068:3104151 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub028:3104068:3104151 [1] NCCL INFO Connected all trees +gpub028:3104068:3104151 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:3104068:3104151 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:3104068:3104151 [1] NCCL INFO comm 0xb8c85b80 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub052:2277064:2277064 [2] NCCL INFO cudaDriverVersion 12010 +gpub052:2277064:2277064 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2277064:2277064 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2277064:2277141 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2277064:2277141 [2] NCCL INFO Using network IB +gpub052:2277064:2277141 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub052:2277064:2277141 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub052:2277064:2277141 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub052:2277064:2277141 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub052:2277064:2277141 [2] NCCL INFO Connected all rings +gpub052:2277064:2277141 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub052:2277064:2277141 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub052:2277064:2277141 [2] NCCL INFO Connected all trees +gpub052:2277064:2277141 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2277064:2277141 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2277064:2277141 [2] NCCL INFO comm 0xa4d0c250 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:387634:387634 [1] NCCL INFO cudaDriverVersion 12010 +gpub078:387634:387634 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:387634:387634 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:387634:387712 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:387634:387712 [1] NCCL INFO Using network IB +gpub078:387634:387712 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub078:387634:387712 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:387634:387712 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:387634:387712 [1] NCCL INFO Connected all rings +gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:387634:387712 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:387634:387712 [1] NCCL INFO Connected all trees +gpub078:387634:387712 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:387634:387712 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:387634:387712 [1] NCCL INFO comm 0xb893bfd0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub051:3225329:3225329 [1] NCCL INFO cudaDriverVersion 12010 +gpub051:3225329:3225329 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3225329:3225329 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3225329:3225407 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3225329:3225407 [1] NCCL INFO Using network IB +gpub051:3225329:3225407 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub051:3225329:3225407 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub051:3225329:3225407 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub051:3225329:3225407 [1] NCCL INFO Connected all rings +gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub084:95630:95630 [1] NCCL INFO cudaDriverVersion 12010 +gpub084:95630:95630 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:95630:95630 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:95630:95713 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:95630:95713 [1] NCCL INFO Using network IB +gpub084:95630:95713 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub084:95630:95713 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub084:95630:95713 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub084:95630:95713 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub084:95630:95713 [1] NCCL INFO Connected all rings +gpub084:95630:95713 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub084:95630:95713 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub084:95630:95713 [1] NCCL INFO Connected all trees +gpub051:3225329:3225407 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub051:3225329:3225407 [1] NCCL INFO Connected all trees +gpub051:3225329:3225407 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3225329:3225407 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3225329:3225407 [1] NCCL INFO comm 0xa2b18990 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub084:95630:95713 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:95630:95713 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:95630:95713 [1] NCCL INFO comm 0x505266b0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub030:2867871:2867871 [2] NCCL INFO cudaDriverVersion 12010 +gpub030:2867871:2867871 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2867871:2867871 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2867871:2867947 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2867871:2867947 [2] NCCL INFO Using network IB +gpub030:2867871:2867947 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub030:2867871:2867947 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub030:2867871:2867947 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub030:2867871:2867947 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub030:2867871:2867947 [2] NCCL INFO Connected all rings +gpub030:2867871:2867947 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub030:2867871:2867947 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub002:2108202:2108202 [3] NCCL INFO cudaDriverVersion 12010 +gpub002:2108202:2108202 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2108202:2108202 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2108202:2108274 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2108202:2108274 [3] NCCL INFO Using network IB +gpub002:2108202:2108274 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub002:2108202:2108274 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub002:2108202:2108274 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub002:2108202:2108274 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub002:2108202:2108274 [3] NCCL INFO Connected all rings +gpub002:2108202:2108274 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub002:2108202:2108274 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub030:2867871:2867947 [2] NCCL INFO Connected all trees +gpub030:2867871:2867947 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2867871:2867947 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2867871:2867947 [2] NCCL INFO comm 0x516c8220 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub002:2108202:2108274 [3] NCCL INFO Connected all trees +gpub002:2108202:2108274 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2108202:2108274 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2108202:2108274 [3] NCCL INFO comm 0xba66c350 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub028:3104069:3104069 [2] NCCL INFO cudaDriverVersion 12010 +gpub028:3104069:3104069 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:3104069:3104069 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:3104069:3104150 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0> +gpub028:3104069:3104150 [2] NCCL INFO Using network IB +gpub028:3104069:3104150 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub028:3104069:3104150 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub028:3104069:3104150 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub028:3104069:3104150 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub028:3104069:3104150 [2] NCCL INFO Connected all rings +gpub028:3104069:3104150 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub028:3104069:3104150 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub028:3104069:3104150 [2] NCCL INFO Connected all trees +gpub028:3104069:3104150 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:3104069:3104150 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:3104069:3104150 [2] NCCL INFO comm 0x50c3cd20 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:95629:95629 [0] NCCL INFO cudaDriverVersion 12010 +gpub084:95629:95629 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:95629:95629 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:95629:95715 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:95629:95715 [0] NCCL INFO Using network IB +gpub084:95629:95715 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub084:95629:95715 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub084:95629:95715 [0] NCCL INFO Connected all rings +gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub084:95629:95715 [0] NCCL INFO Connected all trees +gpub084:95629:95715 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:95629:95715 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:95629:95715 [0] NCCL INFO comm 0x4f579950 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub050:2539555:2539555 [3] NCCL INFO cudaDriverVersion 12010 +gpub050:2539555:2539555 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2539555:2539555 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2539555:2539630 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2539555:2539630 [3] NCCL INFO Using network IB +gpub050:2539555:2539630 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub050:2539555:2539630 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub050:2539555:2539630 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub050:2539555:2539630 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub050:2539555:2539630 [3] NCCL INFO Connected all rings +gpub050:2539555:2539630 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub050:2539555:2539630 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub050:2539555:2539630 [3] NCCL INFO Connected all trees +gpub050:2539555:2539630 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2539555:2539630 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2539555:2539630 [3] NCCL INFO comm 0xb939ca50 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub052:2277062:2277062 [0] NCCL INFO cudaDriverVersion 12010 +gpub052:2277062:2277062 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2277062:2277062 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2277062:2277138 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2277062:2277138 [0] NCCL INFO Using network IB +gpub052:2277062:2277138 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub052:2277062:2277138 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub052:2277062:2277138 [0] NCCL INFO Connected all rings +gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub052:2277062:2277138 [0] NCCL INFO Connected all trees +gpub052:2277062:2277138 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2277062:2277138 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2277062:2277138 [0] NCCL INFO comm 0x8b3e450 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub010:1746408:1746408 [1] NCCL INFO cudaDriverVersion 12010 +gpub010:1746408:1746408 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1746408:1746408 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1746408:1746484 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0> +gpub010:1746408:1746484 [1] NCCL INFO Using network IB +gpub010:1746408:1746484 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub010:1746408:1746484 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub010:1746408:1746484 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub010:1746408:1746484 [1] NCCL INFO Connected all rings +gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub010:1746408:1746484 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub010:1746408:1746484 [1] NCCL INFO Connected all trees +gpub010:1746408:1746484 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub010:1746408:1746484 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1746408:1746484 [1] NCCL INFO comm 0xab889a50 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub010:1746409:1746409 [2] NCCL INFO cudaDriverVersion 12010 +gpub010:1746409:1746409 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0> +gpub010:1746409:1746409 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub010:1746409:1746487 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0> +gpub010:1746409:1746487 [2] NCCL INFO Using network IB +gpub010:1746409:1746487 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub010:1746409:1746487 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub010:1746409:1746487 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub010:1746409:1746487 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub010:1746409:1746487 [2] NCCL INFO Connected all rings +gpub010:1746409:1746487 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub010:1746409:1746487 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub010:1746409:1746487 [2] NCCL INFO Connected all trees +gpub010:1746409:1746487 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub010:1746409:1746487 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub010:1746409:1746487 [2] NCCL INFO comm 0x8d5443e0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub002:2108200:2108200 [1] NCCL INFO cudaDriverVersion 12010 +gpub002:2108200:2108200 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2108200:2108200 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2108200:2108275 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2108200:2108275 [1] NCCL INFO Using network IB +gpub002:2108200:2108275 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub002:2108200:2108275 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub002:2108200:2108275 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub002:2108200:2108275 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub002:2108200:2108275 [1] NCCL INFO Connected all rings +gpub002:2108200:2108275 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub002:2108200:2108275 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub002:2108200:2108275 [1] NCCL INFO Connected all trees +gpub002:2108200:2108275 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2108200:2108275 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2108200:2108275 [1] NCCL INFO comm 0x8e8ce8d0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub002:2108199:2108273 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2108199:2108273 [0] NCCL INFO Using network IB +gpub002:2108199:2108273 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub002:2108199:2108273 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub002:2108199:2108273 [0] NCCL INFO Connected all rings +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub002:2108199:2108273 [0] NCCL INFO Connected all trees +gpub002:2108199:2108273 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2108199:2108273 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2108199:2108273 [0] NCCL INFO comm 0x8d0b120 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub073:748600:748600 [3] NCCL INFO cudaDriverVersion 12010 +gpub073:748600:748600 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:748600:748600 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:748600:748671 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:748600:748671 [3] NCCL INFO Using network IB +gpub073:748600:748671 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub073:748600:748671 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub073:748600:748671 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub073:748600:748671 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub073:748600:748671 [3] NCCL INFO Connected all rings +gpub073:748600:748671 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub073:748600:748671 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub051:3225328:3225328 [0] NCCL INFO cudaDriverVersion 12010 +gpub051:3225328:3225328 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3225328:3225328 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3225328:3225405 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3225328:3225405 [0] NCCL INFO Using network IB +gpub051:3225328:3225405 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub051:3225328:3225405 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub051:3225328:3225405 [0] NCCL INFO Connected all rings +gpub073:748600:748671 [3] NCCL INFO Connected all trees +gpub073:748600:748671 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:748600:748671 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:748600:748671 [3] NCCL INFO comm 0x4f8ebf60 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub051:3225328:3225405 [0] NCCL INFO Connected all trees +gpub051:3225328:3225405 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3225328:3225405 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3225328:3225405 [0] NCCL INFO comm 0x4f680190 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub050:2539554:2539554 [2] NCCL INFO cudaDriverVersion 12010 +gpub050:2539554:2539554 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2539554:2539554 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2539554:2539627 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2539554:2539627 [2] NCCL INFO Using network IB +gpub050:2539554:2539627 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub050:2539554:2539627 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub050:2539554:2539627 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub050:2539554:2539627 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub050:2539554:2539627 [2] NCCL INFO Connected all rings +gpub050:2539554:2539627 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub050:2539554:2539627 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub050:2539554:2539627 [2] NCCL INFO Connected all trees +gpub050:2539554:2539627 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2539554:2539627 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2539554:2539627 [2] NCCL INFO comm 0xa469b710 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub027:3834398:3834398 [2] NCCL INFO cudaDriverVersion 12010 +gpub027:3834398:3834398 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0> +gpub027:3834398:3834398 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub027:3834398:3834473 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0> +gpub027:3834398:3834473 [2] NCCL INFO Using network IB +gpub027:3834398:3834473 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub027:3834398:3834473 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub027:3834398:3834473 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub027:3834398:3834473 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub027:3834398:3834473 [2] NCCL INFO Connected all rings +gpub027:3834398:3834473 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub027:3834398:3834473 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub027:3834398:3834473 [2] NCCL INFO Connected all trees +gpub027:3834398:3834473 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub027:3834398:3834473 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub027:3834398:3834473 [2] NCCL INFO comm 0x505e2640 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub073:748598:748598 [1] NCCL INFO cudaDriverVersion 12010 +gpub073:748598:748598 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:748598:748598 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:748598:748673 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:748598:748673 [1] NCCL INFO Using network IB +gpub073:748598:748673 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub073:748598:748673 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub073:748598:748673 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub073:748598:748673 [1] NCCL INFO Connected all rings +gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub073:748598:748673 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub073:748598:748673 [1] NCCL INFO Connected all trees +gpub073:748598:748673 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:748598:748673 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:748598:748673 [1] NCCL INFO comm 0xb7883d00 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub073:748597:748597 [0] NCCL INFO cudaDriverVersion 12010 +gpub073:748597:748597 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:748597:748597 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:748597:748674 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:748597:748674 [0] NCCL INFO Using network IB +gpub073:748597:748674 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub073:748597:748674 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub073:748597:748674 [0] NCCL INFO Connected all rings +gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub073:748597:748674 [0] NCCL INFO Connected all trees +gpub073:748597:748674 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:748597:748674 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:748597:748674 [0] NCCL INFO comm 0xa03dfc0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub050:2539552:2539552 [0] NCCL INFO cudaDriverVersion 12010 +gpub050:2539552:2539552 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2539552:2539552 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2539552:2539628 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2539552:2539628 [0] NCCL INFO Using network IB +gpub050:2539552:2539628 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub050:2539552:2539628 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub050:2539552:2539628 [0] NCCL INFO Connected all rings +gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub050:2539552:2539628 [0] NCCL INFO Connected all trees +gpub050:2539552:2539628 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2539552:2539628 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2539552:2539628 [0] NCCL INFO comm 0xaafdc050 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub052:2277063:2277063 [1] NCCL INFO cudaDriverVersion 12010 +gpub052:2277063:2277063 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2277063:2277063 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2277063:2277140 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2277063:2277140 [1] NCCL INFO Using network IB +gpub052:2277063:2277140 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub052:2277063:2277140 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub052:2277063:2277140 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub052:2277063:2277140 [1] NCCL INFO Connected all rings +gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub052:2277063:2277140 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub052:2277063:2277140 [1] NCCL INFO Connected all trees +gpub052:2277063:2277140 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2277063:2277140 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2277063:2277140 [1] NCCL INFO comm 0xa865590 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:2789794:2789794 [1] NCCL INFO cudaDriverVersion 12010 +gpub008:2789794:2789794 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:2789794:2789794 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:2789794:2789873 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0> +gpub008:2789794:2789873 [1] NCCL INFO Using network IB +gpub008:2789794:2789873 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub008:2789794:2789873 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub008:2789794:2789873 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub008:2789794:2789873 [1] NCCL INFO Connected all rings +gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub008:2789794:2789873 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub008:2789794:2789873 [1] NCCL INFO Connected all trees +gpub008:2789794:2789873 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:2789794:2789873 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:2789794:2789873 [1] NCCL INFO comm 0x8abbf8b0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub002:2108201:2108201 [2] NCCL INFO cudaDriverVersion 12010 +gpub002:2108201:2108201 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:2108201:2108201 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:2108201:2108276 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:2108201:2108276 [2] NCCL INFO Using network IB +gpub002:2108201:2108276 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub002:2108201:2108276 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub002:2108201:2108276 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub002:2108201:2108276 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub002:2108201:2108276 [2] NCCL INFO Connected all rings +gpub002:2108201:2108276 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub002:2108201:2108276 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub002:2108201:2108276 [2] NCCL INFO Connected all trees +gpub002:2108201:2108276 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:2108201:2108276 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:2108201:2108276 [2] NCCL INFO comm 0x8ca2cb90 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub051:3225330:3225330 [2] NCCL INFO cudaDriverVersion 12010 +gpub051:3225330:3225330 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3225330:3225330 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3225330:3225408 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3225330:3225408 [2] NCCL INFO Using network IB +gpub051:3225330:3225408 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub051:3225330:3225408 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub051:3225330:3225408 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub051:3225330:3225408 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub051:3225330:3225408 [2] NCCL INFO Connected all rings +gpub051:3225330:3225408 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub051:3225330:3225408 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub051:3225330:3225408 [2] NCCL INFO Connected all trees +gpub051:3225330:3225408 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3225330:3225408 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3225330:3225408 [2] NCCL INFO comm 0x4f59a920 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub051:3225331:3225331 [3] NCCL INFO cudaDriverVersion 12010 +gpub051:3225331:3225331 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:3225331:3225331 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:3225331:3225406 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:3225331:3225406 [3] NCCL INFO Using network IB +gpub051:3225331:3225406 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub051:3225331:3225406 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub051:3225331:3225406 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub051:3225331:3225406 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub051:3225331:3225406 [3] NCCL INFO Connected all rings +gpub051:3225331:3225406 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub051:3225331:3225406 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub051:3225331:3225406 [3] NCCL INFO Connected all trees +gpub051:3225331:3225406 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:3225331:3225406 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:3225331:3225406 [3] NCCL INFO comm 0xb371b610 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub052:2277065:2277065 [3] NCCL INFO cudaDriverVersion 12010 +gpub052:2277065:2277065 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2277065:2277065 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2277065:2277139 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2277065:2277139 [3] NCCL INFO Using network IB +gpub052:2277065:2277139 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub052:2277065:2277139 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub052:2277065:2277139 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub052:2277065:2277139 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub052:2277065:2277139 [3] NCCL INFO Connected all rings +gpub052:2277065:2277139 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub052:2277065:2277139 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub052:2277065:2277139 [3] NCCL INFO Connected all trees +gpub052:2277065:2277139 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2277065:2277139 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2277065:2277139 [3] NCCL INFO comm 0x8f38890 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub002:0/64] 2023-07-12 13:27:01,090 (trainer:732) INFO: 40epoch:train:1-100batch: iter_time=1.208, forward_time=0.235, loss_ctc=61.116, loss_att=44.050, acc=0.697, loss=49.170, backward_time=1.036, grad_norm=103.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.735e-05, train_time=8.676 +[gpub002:0/64] 2023-07-12 13:29:17,100 (trainer:732) INFO: 40epoch:train:101-200batch: iter_time=1.264e-04, forward_time=0.142, loss_ctc=72.135, loss_att=57.235, acc=0.700, loss=61.705, backward_time=1.027, grad_norm=107.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.734e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 13:31:32,346 (trainer:732) INFO: 40epoch:train:201-300batch: iter_time=1.291e-04, forward_time=0.142, loss_ctc=81.043, loss_att=57.325, acc=0.717, loss=64.440, backward_time=1.025, grad_norm=163.403, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.734e-05, train_time=2.705 +[gpub002:0/64] 2023-07-12 13:33:47,706 (trainer:732) INFO: 40epoch:train:301-400batch: iter_time=1.172e-04, forward_time=0.143, loss_ctc=71.829, loss_att=55.277, acc=0.697, loss=60.243, backward_time=1.026, grad_norm=110.550, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.733e-05, train_time=2.707 +[gpub002:0/64] 2023-07-12 13:36:02,885 (trainer:732) INFO: 40epoch:train:401-500batch: iter_time=1.285e-04, forward_time=0.143, loss_ctc=72.347, loss_att=53.215, acc=0.711, loss=58.954, backward_time=1.025, grad_norm=113.868, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.732e-05, train_time=2.703 +[gpub002:0/64] 2023-07-12 13:38:27,982 (trainer:732) INFO: 40epoch:train:501-600batch: iter_time=1.282e-04, forward_time=0.141, loss_ctc=68.400, loss_att=46.292, acc=0.689, loss=52.924, backward_time=1.032, grad_norm=117.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.731e-05, train_time=2.902 +[gpub002:0/64] 2023-07-12 13:40:47,434 (trainer:732) INFO: 40epoch:train:601-700batch: iter_time=1.231e-04, forward_time=0.142, loss_ctc=68.083, loss_att=50.252, acc=0.715, loss=55.601, backward_time=1.028, grad_norm=108.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.731e-05, train_time=2.789 +[gpub002:0/64] 2023-07-12 13:43:10,107 (trainer:732) INFO: 40epoch:train:701-800batch: iter_time=1.157e-04, forward_time=0.142, loss_ctc=68.676, loss_att=52.334, acc=0.698, loss=57.237, backward_time=1.024, grad_norm=108.949, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.730e-05, train_time=2.852 +[gpub002:0/64] 2023-07-12 13:44:08,310 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-12 13:44:25,786 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 13:44:29,170 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 13:44:29,170 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-12 13:44:29,176 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 13:49:29,871 (trainer:732) INFO: 40epoch:train:801-900batch: iter_time=2.128, forward_time=0.187, loss_ctc=62.142, loss_att=45.987, acc=0.699, loss=50.833, backward_time=1.043, grad_norm=127.878, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.729e-05, train_time=7.596 +[gpub002:0/64] 2023-07-12 13:51:45,925 (trainer:732) INFO: 40epoch:train:901-1000batch: iter_time=1.253e-04, forward_time=0.144, loss_ctc=73.026, loss_att=58.214, acc=0.710, loss=62.657, backward_time=1.025, grad_norm=124.501, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.728e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 13:54:01,587 (trainer:732) INFO: 40epoch:train:1001-1100batch: iter_time=1.277e-04, forward_time=0.143, loss_ctc=79.141, loss_att=57.052, acc=0.725, loss=63.679, backward_time=1.025, grad_norm=151.133, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.727e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 13:56:17,421 (trainer:732) INFO: 40epoch:train:1101-1200batch: iter_time=1.301e-04, forward_time=0.143, loss_ctc=71.202, loss_att=55.122, acc=0.705, loss=59.946, backward_time=1.026, grad_norm=97.471, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.727e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 13:58:33,114 (trainer:732) INFO: 40epoch:train:1201-1300batch: iter_time=1.240e-04, forward_time=0.143, loss_ctc=71.459, loss_att=52.133, acc=0.724, loss=57.931, backward_time=1.025, grad_norm=140.565, clip=100.000, loss_scale=5.192e+32, optim_step_time=0.179, optim0_lr0=5.726e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 14:00:48,389 (trainer:732) INFO: 40epoch:train:1301-1400batch: iter_time=1.241e-04, forward_time=0.143, loss_ctc=65.305, loss_att=45.866, acc=0.695, loss=51.698, backward_time=1.021, grad_norm=108.278, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.725e-05, train_time=2.705 +[gpub002:0/64] 2023-07-12 14:03:05,877 (trainer:732) INFO: 40epoch:train:1401-1500batch: iter_time=1.239e-04, forward_time=0.142, loss_ctc=68.643, loss_att=50.860, acc=0.714, loss=56.195, backward_time=1.025, grad_norm=114.462, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.724e-05, train_time=2.750 +[gpub002:0/64] 2023-07-12 14:05:24,000 (trainer:732) INFO: 40epoch:train:1501-1600batch: iter_time=1.178e-04, forward_time=0.143, loss_ctc=67.303, loss_att=51.300, acc=0.712, loss=56.101, backward_time=1.026, grad_norm=99.620, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.724e-05, train_time=2.762 +[gpub002:0/64] 2023-07-12 14:06:37,008 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 14:06:56,326 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-12 14:07:14,096 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 14:07:17,513 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 14:07:17,513 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-12 14:07:17,519 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 14:12:18,848 (trainer:732) INFO: 40epoch:train:1601-1700batch: iter_time=1.255, forward_time=0.143, loss_ctc=66.208, loss_att=50.001, acc=0.701, loss=54.863, backward_time=1.030, grad_norm=128.956, clip=100.000, loss_scale=4.967e+32, optim_step_time=0.179, optim0_lr0=5.723e-05, train_time=8.297 +[gpub002:0/64] 2023-07-12 14:14:35,280 (trainer:732) INFO: 40epoch:train:1701-1800batch: iter_time=1.084e-04, forward_time=0.143, loss_ctc=67.632, loss_att=51.671, acc=0.709, loss=56.460, backward_time=1.028, grad_norm=131.230, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.722e-05, train_time=2.728 +[gpub002:0/64] 2023-07-12 14:16:51,826 (trainer:732) INFO: 40epoch:train:1801-1900batch: iter_time=1.260e-04, forward_time=0.145, loss_ctc=77.856, loss_att=57.061, acc=0.718, loss=63.299, backward_time=1.031, grad_norm=127.495, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.722e-05, train_time=2.731 +[gpub002:0/64] 2023-07-12 14:19:07,735 (trainer:732) INFO: 40epoch:train:1901-2000batch: iter_time=1.083e-04, forward_time=0.143, loss_ctc=74.187, loss_att=54.012, acc=0.723, loss=60.065, backward_time=1.029, grad_norm=134.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.721e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 14:21:23,408 (trainer:732) INFO: 40epoch:train:2001-2100batch: iter_time=1.295e-04, forward_time=0.143, loss_ctc=71.655, loss_att=56.017, acc=0.709, loss=60.708, backward_time=1.025, grad_norm=113.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.720e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 14:23:38,786 (trainer:732) INFO: 40epoch:train:2101-2200batch: iter_time=1.211e-04, forward_time=0.143, loss_ctc=68.732, loss_att=48.850, acc=0.703, loss=54.815, backward_time=1.022, grad_norm=114.122, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.719e-05, train_time=2.707 +[gpub002:0/64] 2023-07-12 14:25:54,333 (trainer:732) INFO: 40epoch:train:2201-2300batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=63.319, loss_att=45.815, acc=0.718, loss=51.066, backward_time=1.024, grad_norm=112.742, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.719e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 14:28:11,996 (trainer:732) INFO: 40epoch:train:2301-2400batch: iter_time=1.254e-04, forward_time=0.143, loss_ctc=66.195, loss_att=50.742, acc=0.715, loss=55.378, backward_time=1.023, grad_norm=102.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.718e-05, train_time=2.753 +[gpub002:0/64] 2023-07-12 14:30:30,321 (trainer:732) INFO: 40epoch:train:2401-2500batch: iter_time=1.265e-04, forward_time=0.143, loss_ctc=71.093, loss_att=53.758, acc=0.706, loss=58.959, backward_time=1.027, grad_norm=115.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.717e-05, train_time=2.766 +[gpub002:0/64] 2023-07-12 14:30:32,911 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-12 14:30:50,993 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 14:30:54,403 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 14:30:54,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-12 14:30:54,409 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 14:37:48,647 (trainer:732) INFO: 40epoch:train:2501-2600batch: iter_time=1.257, forward_time=0.143, loss_ctc=61.405, loss_att=46.625, acc=0.694, loss=51.059, backward_time=1.033, grad_norm=109.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.716e-05, train_time=8.766 +[gpub002:0/64] 2023-07-12 14:40:05,295 (trainer:732) INFO: 40epoch:train:2601-2700batch: iter_time=1.359e-04, forward_time=0.144, loss_ctc=69.964, loss_att=53.049, acc=0.717, loss=58.124, backward_time=1.027, grad_norm=135.636, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.716e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 14:42:21,035 (trainer:732) INFO: 40epoch:train:2701-2800batch: iter_time=1.229e-04, forward_time=0.144, loss_ctc=77.512, loss_att=56.118, acc=0.709, loss=62.537, backward_time=1.025, grad_norm=137.980, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.715e-05, train_time=2.715 +[gpub002:0/64] 2023-07-12 14:44:36,479 (trainer:732) INFO: 40epoch:train:2801-2900batch: iter_time=1.048e-04, forward_time=0.142, loss_ctc=71.235, loss_att=56.169, acc=0.706, loss=60.689, backward_time=1.023, grad_norm=120.061, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.714e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 14:46:51,759 (trainer:732) INFO: 40epoch:train:2901-3000batch: iter_time=1.043e-04, forward_time=0.143, loss_ctc=68.448, loss_att=51.921, acc=0.704, loss=56.879, backward_time=1.022, grad_norm=104.401, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.713e-05, train_time=2.705 +[gpub002:0/64] 2023-07-12 14:49:07,671 (trainer:732) INFO: 40epoch:train:3001-3100batch: iter_time=1.139e-04, forward_time=0.142, loss_ctc=66.825, loss_att=44.975, acc=0.706, loss=51.530, backward_time=1.021, grad_norm=103.993, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.713e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 14:51:28,934 (trainer:732) INFO: 40epoch:train:3101-3200batch: iter_time=1.132e-04, forward_time=0.142, loss_ctc=65.918, loss_att=48.901, acc=0.716, loss=54.006, backward_time=1.027, grad_norm=102.075, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.712e-05, train_time=2.825 +[gpub002:0/64] 2023-07-12 14:53:49,088 (trainer:732) INFO: 40epoch:train:3201-3300batch: iter_time=1.065e-04, forward_time=0.142, loss_ctc=69.493, loss_att=54.998, acc=0.698, loss=59.346, backward_time=1.045, grad_norm=117.513, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.711e-05, train_time=2.803 +[gpub002:0/64] 2023-07-12 14:54:39,670 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-12 14:54:57,543 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 14:55:01,100 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 14:55:01,100 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-12 14:55:01,106 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 15:00:02,934 (trainer:732) INFO: 40epoch:train:3301-3400batch: iter_time=1.259, forward_time=0.143, loss_ctc=63.620, loss_att=45.338, acc=0.714, loss=50.822, backward_time=1.049, grad_norm=109.621, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.710e-05, train_time=7.477 +[gpub002:0/64] 2023-07-12 15:02:20,429 (trainer:732) INFO: 40epoch:train:3401-3500batch: iter_time=1.191e-04, forward_time=0.143, loss_ctc=66.528, loss_att=49.949, acc=0.714, loss=54.922, backward_time=1.026, grad_norm=125.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.710e-05, train_time=2.750 +[gpub002:0/64] 2023-07-12 15:04:36,303 (trainer:732) INFO: 40epoch:train:3501-3600batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=76.092, loss_att=55.412, acc=0.724, loss=61.616, backward_time=1.026, grad_norm=122.114, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.709e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 15:06:52,169 (trainer:732) INFO: 40epoch:train:3601-3700batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=72.680, loss_att=53.035, acc=0.724, loss=58.929, backward_time=1.025, grad_norm=97.254, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.708e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 15:09:07,796 (trainer:732) INFO: 40epoch:train:3701-3800batch: iter_time=1.156e-04, forward_time=0.143, loss_ctc=69.121, loss_att=53.328, acc=0.720, loss=58.066, backward_time=1.024, grad_norm=139.481, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.707e-05, train_time=2.712 +[gpub002:0/64] 2023-07-12 15:11:23,110 (trainer:732) INFO: 40epoch:train:3801-3900batch: iter_time=1.205e-04, forward_time=0.143, loss_ctc=69.056, loss_att=47.903, acc=0.705, loss=54.249, backward_time=1.021, grad_norm=121.204, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.707e-05, train_time=2.706 +[gpub002:0/64] 2023-07-12 15:13:40,798 (trainer:732) INFO: 40epoch:train:3901-4000batch: iter_time=1.128e-04, forward_time=0.143, loss_ctc=65.887, loss_att=48.895, acc=0.717, loss=53.992, backward_time=1.027, grad_norm=104.050, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.706e-05, train_time=2.754 +[gpub002:0/64] 2023-07-12 15:16:02,209 (trainer:732) INFO: 40epoch:train:4001-4100batch: iter_time=1.186e-04, forward_time=0.143, loss_ctc=66.041, loss_att=51.439, acc=0.707, loss=55.819, backward_time=1.028, grad_norm=112.561, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.705e-05, train_time=2.828 +[gpub002:0/64] 2023-07-12 15:17:39,601 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-12 15:17:57,438 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 15:18:00,971 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 15:18:00,971 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-12 15:18:00,978 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 15:21:47,365 (trainer:732) INFO: 40epoch:train:4101-4200batch: iter_time=1.250, forward_time=0.144, loss_ctc=68.950, loss_att=50.746, acc=0.718, loss=56.207, backward_time=1.041, grad_norm=106.172, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.704e-05, train_time=6.903 +[gpub002:0/64] 2023-07-12 15:24:03,354 (trainer:732) INFO: 40epoch:train:4201-4300batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=62.707, loss_att=48.487, acc=0.702, loss=52.753, backward_time=1.026, grad_norm=100.763, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.704e-05, train_time=2.720 +[gpub002:0/64] 2023-07-12 15:26:19,013 (trainer:732) INFO: 40epoch:train:4301-4400batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=69.760, loss_att=52.370, acc=0.725, loss=57.587, backward_time=1.023, grad_norm=106.670, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.703e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 15:28:34,645 (trainer:732) INFO: 40epoch:train:4401-4500batch: iter_time=1.100e-04, forward_time=0.143, loss_ctc=78.815, loss_att=57.161, acc=0.718, loss=63.657, backward_time=1.024, grad_norm=126.050, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.702e-05, train_time=2.712 +[gpub002:0/64] 2023-07-12 15:30:50,128 (trainer:732) INFO: 40epoch:train:4501-4600batch: iter_time=1.185e-04, forward_time=0.143, loss_ctc=67.843, loss_att=54.104, acc=0.716, loss=58.226, backward_time=1.023, grad_norm=113.696, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.701e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 15:33:05,671 (trainer:732) INFO: 40epoch:train:4601-4700batch: iter_time=1.154e-04, forward_time=0.143, loss_ctc=70.374, loss_att=50.230, acc=0.723, loss=56.273, backward_time=1.023, grad_norm=118.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.701e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 15:35:20,996 (trainer:732) INFO: 40epoch:train:4701-4800batch: iter_time=1.161e-04, forward_time=0.143, loss_ctc=64.594, loss_att=44.965, acc=0.707, loss=50.854, backward_time=1.022, grad_norm=106.973, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.700e-05, train_time=2.706 +[gpub002:0/64] 2023-07-12 15:37:36,540 (trainer:732) INFO: 40epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.143, loss_ctc=67.322, loss_att=50.435, acc=0.717, loss=55.501, backward_time=1.024, grad_norm=103.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.699e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 15:39:52,011 (trainer:732) INFO: 40epoch:train:4901-5000batch: iter_time=1.164e-04, forward_time=0.143, loss_ctc=70.025, loss_att=54.199, acc=0.712, loss=58.947, backward_time=1.023, grad_norm=113.323, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.698e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 15:39:54,676 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-12 15:40:13,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 15:40:16,430 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 15:40:16,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-12 15:40:16,437 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 15:45:20,743 (trainer:732) INFO: 40epoch:train:5001-5100batch: iter_time=1.263, forward_time=0.180, loss_ctc=60.084, loss_att=45.552, acc=0.702, loss=49.912, backward_time=1.033, grad_norm=102.864, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.698e-05, train_time=6.574 +[gpub002:0/64] 2023-07-12 15:47:37,331 (trainer:732) INFO: 40epoch:train:5101-5200batch: iter_time=1.241e-04, forward_time=0.144, loss_ctc=70.511, loss_att=52.939, acc=0.720, loss=58.210, backward_time=1.024, grad_norm=103.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.697e-05, train_time=2.732 +[gpub002:0/64] 2023-07-12 15:49:58,905 (trainer:732) INFO: 40epoch:train:5201-5300batch: iter_time=1.112e-04, forward_time=0.142, loss_ctc=77.354, loss_att=56.014, acc=0.710, loss=62.416, backward_time=1.023, grad_norm=133.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.696e-05, train_time=2.831 +[gpub002:0/64] 2023-07-12 15:52:14,432 (trainer:732) INFO: 40epoch:train:5301-5400batch: iter_time=1.243e-04, forward_time=0.143, loss_ctc=68.506, loss_att=54.652, acc=0.711, loss=58.808, backward_time=1.025, grad_norm=114.787, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.695e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 15:54:29,792 (trainer:732) INFO: 40epoch:train:5401-5500batch: iter_time=1.084e-04, forward_time=0.143, loss_ctc=68.380, loss_att=50.824, acc=0.707, loss=56.091, backward_time=1.023, grad_norm=186.761, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.695e-05, train_time=2.707 +[gpub002:0/64] 2023-07-12 15:56:44,731 (trainer:732) INFO: 40epoch:train:5501-5600batch: iter_time=1.176e-04, forward_time=0.142, loss_ctc=65.993, loss_att=45.437, acc=0.706, loss=51.604, backward_time=1.019, grad_norm=104.703, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.694e-05, train_time=2.699 +[gpub002:0/64] 2023-07-12 15:58:59,893 (trainer:732) INFO: 40epoch:train:5601-5700batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=65.646, loss_att=48.820, acc=0.718, loss=53.868, backward_time=1.022, grad_norm=121.882, clip=100.000, loss_scale=4.738e+32, optim_step_time=0.180, optim0_lr0=5.693e-05, train_time=2.703 +[gpub002:0/64] 2023-07-12 16:01:15,393 (trainer:732) INFO: 40epoch:train:5701-5800batch: iter_time=1.205e-04, forward_time=0.143, loss_ctc=69.173, loss_att=55.052, acc=0.701, loss=59.288, backward_time=1.025, grad_norm=113.446, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.693e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 16:02:13,718 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-12 16:02:31,777 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 16:02:35,214 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 16:02:35,215 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-12 16:02:35,223 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 16:06:51,419 (trainer:732) INFO: 40epoch:train:5801-5900batch: iter_time=1.911, forward_time=0.144, loss_ctc=61.992, loss_att=46.321, acc=0.712, loss=51.022, backward_time=1.033, grad_norm=119.027, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.692e-05, train_time=6.720 +[gpub002:0/64] 2023-07-12 16:09:07,218 (trainer:732) INFO: 40epoch:train:5901-6000batch: iter_time=1.335e-04, forward_time=0.143, loss_ctc=66.798, loss_att=49.972, acc=0.708, loss=55.019, backward_time=1.023, grad_norm=108.107, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.691e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 16:11:22,903 (trainer:732) INFO: 40epoch:train:6001-6100batch: iter_time=1.237e-04, forward_time=0.143, loss_ctc=76.895, loss_att=56.337, acc=0.716, loss=62.504, backward_time=1.024, grad_norm=192.818, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.690e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 16:13:38,302 (trainer:732) INFO: 40epoch:train:6101-6200batch: iter_time=1.439e-04, forward_time=0.144, loss_ctc=73.992, loss_att=52.793, acc=0.718, loss=59.152, backward_time=1.022, grad_norm=97.949, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.690e-05, train_time=2.708 +[gpub002:0/64] 2023-07-12 16:15:53,887 (trainer:732) INFO: 40epoch:train:6201-6300batch: iter_time=1.375e-04, forward_time=0.144, loss_ctc=69.085, loss_att=53.781, acc=0.709, loss=58.372, backward_time=1.024, grad_norm=107.327, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.689e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 16:18:09,295 (trainer:732) INFO: 40epoch:train:6301-6400batch: iter_time=1.276e-04, forward_time=0.145, loss_ctc=67.794, loss_att=47.222, acc=0.705, loss=53.394, backward_time=1.023, grad_norm=105.215, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.688e-05, train_time=2.708 +[gpub002:0/64] 2023-07-12 16:20:24,498 (trainer:732) INFO: 40epoch:train:6401-6500batch: iter_time=1.279e-04, forward_time=0.145, loss_ctc=65.690, loss_att=47.722, acc=0.718, loss=53.112, backward_time=1.023, grad_norm=125.378, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.687e-05, train_time=2.704 +[gpub002:0/64] 2023-07-12 16:22:12,380 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 16:22:39,598 (trainer:732) INFO: 40epoch:train:6501-6600batch: iter_time=1.096e-04, forward_time=0.144, loss_ctc=64.236, loss_att=49.951, acc=0.705, loss=54.237, backward_time=1.023, grad_norm=93.202, clip=100.000, loss_scale=5.828e+32, optim_step_time=0.180, optim0_lr0=5.687e-05, train_time=2.702 +[gpub002:0/64] 2023-07-12 16:24:14,970 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-12 16:24:32,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 16:24:36,515 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 16:24:36,515 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-12 16:24:36,521 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 16:28:25,470 (trainer:732) INFO: 40epoch:train:6601-6700batch: iter_time=1.255, forward_time=0.145, loss_ctc=68.337, loss_att=49.507, acc=0.724, loss=55.156, backward_time=1.038, grad_norm=116.207, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.686e-05, train_time=6.917 +[gpub002:0/64] 2023-07-12 16:30:42,084 (trainer:732) INFO: 40epoch:train:6701-6800batch: iter_time=1.329e-04, forward_time=0.145, loss_ctc=62.314, loss_att=47.201, acc=0.706, loss=51.735, backward_time=1.025, grad_norm=98.754, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.685e-05, train_time=2.732 +[gpub002:0/64] 2023-07-12 16:32:57,908 (trainer:732) INFO: 40epoch:train:6801-6900batch: iter_time=1.346e-04, forward_time=0.145, loss_ctc=68.481, loss_att=52.287, acc=0.727, loss=57.145, backward_time=1.026, grad_norm=109.277, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.684e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 16:35:13,486 (trainer:732) INFO: 40epoch:train:6901-7000batch: iter_time=1.281e-04, forward_time=0.144, loss_ctc=79.843, loss_att=57.285, acc=0.717, loss=64.053, backward_time=1.026, grad_norm=130.573, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.684e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 16:37:29,332 (trainer:732) INFO: 40epoch:train:7001-7100batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=67.417, loss_att=53.169, acc=0.721, loss=57.444, backward_time=1.027, grad_norm=147.307, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.683e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 16:39:44,891 (trainer:732) INFO: 40epoch:train:7101-7200batch: iter_time=1.272e-04, forward_time=0.145, loss_ctc=70.180, loss_att=50.528, acc=0.727, loss=56.424, backward_time=1.024, grad_norm=107.531, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.682e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 16:42:05,273 (trainer:732) INFO: 40epoch:train:7201-7300batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=63.893, loss_att=44.742, acc=0.708, loss=50.488, backward_time=1.023, grad_norm=103.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.681e-05, train_time=2.807 +[gpub002:0/64] 2023-07-12 16:44:20,746 (trainer:732) INFO: 40epoch:train:7301-7400batch: iter_time=1.136e-04, forward_time=0.143, loss_ctc=67.728, loss_att=50.827, acc=0.719, loss=55.897, backward_time=1.024, grad_norm=118.243, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.681e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 16:46:36,313 (trainer:732) INFO: 40epoch:train:7401-7500batch: iter_time=1.123e-04, forward_time=0.143, loss_ctc=69.549, loss_att=54.017, acc=0.715, loss=58.676, backward_time=1.024, grad_norm=105.138, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.680e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 16:46:39,103 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-12 16:46:57,326 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 16:47:00,716 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 16:47:00,716 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-12 16:47:00,722 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 16:52:43,273 (trainer:732) INFO: 40epoch:train:7501-7600batch: iter_time=1.295, forward_time=0.144, loss_ctc=58.813, loss_att=42.145, acc=0.710, loss=47.145, backward_time=1.037, grad_norm=124.111, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.679e-05, train_time=7.339 +[gpub002:0/64] 2023-07-12 16:54:59,882 (trainer:732) INFO: 40epoch:train:7601-7700batch: iter_time=1.243e-04, forward_time=0.144, loss_ctc=68.732, loss_att=53.937, acc=0.713, loss=58.375, backward_time=1.029, grad_norm=130.599, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.679e-05, train_time=2.732 +[gpub002:0/64] 2023-07-12 16:57:15,353 (trainer:732) INFO: 40epoch:train:7701-7800batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=77.851, loss_att=55.826, acc=0.727, loss=62.433, backward_time=1.025, grad_norm=121.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.678e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 16:59:30,902 (trainer:732) INFO: 40epoch:train:7801-7900batch: iter_time=1.444e-04, forward_time=0.144, loss_ctc=70.346, loss_att=53.701, acc=0.708, loss=58.695, backward_time=1.026, grad_norm=112.362, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.677e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 17:01:46,962 (trainer:732) INFO: 40epoch:train:7901-8000batch: iter_time=1.644e-04, forward_time=0.145, loss_ctc=69.909, loss_att=51.923, acc=0.720, loss=57.319, backward_time=1.024, grad_norm=115.655, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.676e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 17:04:02,181 (trainer:732) INFO: 40epoch:train:8001-8100batch: iter_time=1.097e-04, forward_time=0.143, loss_ctc=64.797, loss_att=45.585, acc=0.695, loss=51.348, backward_time=1.021, grad_norm=96.860, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.676e-05, train_time=2.704 +[gpub002:0/64] 2023-07-12 17:06:18,006 (trainer:732) INFO: 40epoch:train:8101-8200batch: iter_time=1.347e-04, forward_time=0.144, loss_ctc=65.946, loss_att=48.537, acc=0.721, loss=53.759, backward_time=1.025, grad_norm=103.143, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.675e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 17:08:43,714 (trainer:732) INFO: 40epoch:train:8201-8300batch: iter_time=1.585e-04, forward_time=0.144, loss_ctc=66.556, loss_att=50.587, acc=0.709, loss=55.378, backward_time=1.033, grad_norm=104.722, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.674e-05, train_time=2.914 +[gpub002:0/64] 2023-07-12 17:09:31,672 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-12 17:09:49,815 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 17:09:53,285 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 17:09:53,285 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-12 17:09:53,291 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 17:16:05,942 (trainer:732) INFO: 40epoch:train:8301-8400batch: iter_time=1.764, forward_time=0.145, loss_ctc=61.574, loss_att=47.462, acc=0.713, loss=51.695, backward_time=1.040, grad_norm=100.876, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.673e-05, train_time=8.844 +[gpub002:0/64] 2023-07-12 17:18:22,844 (trainer:732) INFO: 40epoch:train:8401-8500batch: iter_time=1.222e-04, forward_time=0.144, loss_ctc=66.028, loss_att=49.519, acc=0.712, loss=54.472, backward_time=1.025, grad_norm=100.831, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.673e-05, train_time=2.738 +[gpub002:0/64] 2023-07-12 17:20:39,127 (trainer:732) INFO: 40epoch:train:8501-8600batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=75.776, loss_att=55.115, acc=0.722, loss=61.313, backward_time=1.028, grad_norm=133.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.672e-05, train_time=2.725 +[gpub002:0/64] 2023-07-12 17:22:54,520 (trainer:732) INFO: 40epoch:train:8601-8700batch: iter_time=1.148e-04, forward_time=0.143, loss_ctc=71.264, loss_att=51.115, acc=0.722, loss=57.159, backward_time=1.024, grad_norm=108.773, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.671e-05, train_time=2.708 +[gpub002:0/64] 2023-07-12 17:25:10,311 (trainer:732) INFO: 40epoch:train:8701-8800batch: iter_time=1.039e-04, forward_time=0.144, loss_ctc=70.093, loss_att=53.908, acc=0.711, loss=58.763, backward_time=1.027, grad_norm=115.711, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.671e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 17:27:25,641 (trainer:732) INFO: 40epoch:train:8801-8900batch: iter_time=1.147e-04, forward_time=0.143, loss_ctc=65.537, loss_att=45.778, acc=0.707, loss=51.706, backward_time=1.025, grad_norm=109.775, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.670e-05, train_time=2.706 +[gpub002:0/64] 2023-07-12 17:29:40,885 (trainer:732) INFO: 40epoch:train:8901-9000batch: iter_time=1.186e-04, forward_time=0.143, loss_ctc=64.933, loss_att=47.129, acc=0.721, loss=52.470, backward_time=1.024, grad_norm=108.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.669e-05, train_time=2.705 +[gpub002:0/64] 2023-07-12 17:31:56,246 (trainer:732) INFO: 40epoch:train:9001-9100batch: iter_time=1.121e-04, forward_time=0.143, loss_ctc=64.752, loss_att=50.086, acc=0.708, loss=54.486, backward_time=1.024, grad_norm=114.341, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.668e-05, train_time=2.707 +[gpub002:0/64] 2023-07-12 17:33:29,921 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-12 17:33:48,221 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 17:33:51,742 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 17:33:51,743 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-12 17:33:51,749 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 17:38:46,300 (trainer:732) INFO: 40epoch:train:9101-9200batch: iter_time=1.947, forward_time=0.180, loss_ctc=68.492, loss_att=49.647, acc=0.722, loss=55.300, backward_time=1.038, grad_norm=117.017, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.668e-05, train_time=8.201 +[gpub002:0/64] 2023-07-12 17:41:02,935 (trainer:732) INFO: 40epoch:train:9201-9300batch: iter_time=1.099e-04, forward_time=0.145, loss_ctc=61.780, loss_att=48.408, acc=0.705, loss=52.420, backward_time=1.025, grad_norm=130.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.667e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 17:43:20,427 (trainer:732) INFO: 40epoch:train:9301-9400batch: iter_time=1.202e-04, forward_time=0.144, loss_ctc=68.219, loss_att=52.508, acc=0.732, loss=57.222, backward_time=1.027, grad_norm=114.930, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.666e-05, train_time=2.750 +[gpub002:0/64] 2023-07-12 17:45:36,344 (trainer:732) INFO: 40epoch:train:9401-9500batch: iter_time=9.479e-05, forward_time=0.142, loss_ctc=78.737, loss_att=56.410, acc=0.723, loss=63.108, backward_time=1.025, grad_norm=120.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.665e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 17:47:51,855 (trainer:732) INFO: 40epoch:train:9501-9600batch: iter_time=9.460e-05, forward_time=0.143, loss_ctc=66.156, loss_att=54.034, acc=0.715, loss=57.671, backward_time=1.023, grad_norm=119.804, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.665e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 17:50:07,301 (trainer:732) INFO: 40epoch:train:9601-9700batch: iter_time=9.742e-05, forward_time=0.143, loss_ctc=69.405, loss_att=50.896, acc=0.723, loss=56.449, backward_time=1.024, grad_norm=105.723, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.664e-05, train_time=2.709 +[gpub002:0/64] 2023-07-12 17:52:22,352 (trainer:732) INFO: 40epoch:train:9701-9800batch: iter_time=1.037e-04, forward_time=0.142, loss_ctc=62.756, loss_att=43.537, acc=0.712, loss=49.303, backward_time=1.022, grad_norm=114.282, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.663e-05, train_time=2.701 +[gpub002:0/64] 2023-07-12 17:54:44,234 (trainer:732) INFO: 40epoch:train:9801-9900batch: iter_time=9.713e-05, forward_time=0.143, loss_ctc=67.843, loss_att=50.651, acc=0.721, loss=55.809, backward_time=1.030, grad_norm=112.352, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.663e-05, train_time=2.837 +[gpub002:0/64] 2023-07-12 17:57:01,502 (trainer:732) INFO: 40epoch:train:9901-10000batch: iter_time=9.865e-05, forward_time=0.141, loss_ctc=69.656, loss_att=53.667, acc=0.714, loss=58.464, backward_time=1.028, grad_norm=130.803, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.662e-05, train_time=2.745 +[gpub002:0/64] 2023-07-12 18:11:36,573 (trainer:338) INFO: 40epoch results: [train] iter_time=0.178, forward_time=0.145, loss_ctc=68.748, loss_att=51.163, acc=0.712, loss=56.438, backward_time=1.027, grad_norm=116.730, clip=100.000, loss_scale=3.679e+32, optim_step_time=0.180, optim0_lr0=5.698e-05, train_time=3.327, time=4 hours, 37 minutes and 31.92 seconds, total_count=370000, gpu_max_cached_mem_GB=34.277, [valid] loss_ctc=44.137, cer_ctc=0.263, loss_att=39.500, acc=0.667, cer=0.428, wer=1.000, loss=40.891, time=7 minutes and 44.49 seconds, total_count=37950, gpu_max_cached_mem_GB=37.572, [att_plot] time=6 minutes and 32.9 seconds, total_count=0, gpu_max_cached_mem_GB=37.572 +[gpub002:0/64] 2023-07-12 18:11:52,961 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-12 18:11:53,003 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till40epoch.pth +[gpub002:0/64] 2023-07-12 18:12:44,008 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till40epoch.pth +[gpub002:0/64] 2023-07-12 18:13:08,270 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/35epoch.pth +[gpub002:0/64] 2023-07-12 18:13:08,326 (trainer:272) INFO: 41/50epoch started. Estimated time to finish: 2 days, 53 minutes and 30.81 seconds +[gpub002:0/64] 2023-07-12 18:13:09,583 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-12 18:13:27,325 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 18:13:30,751 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 18:13:30,751 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-12 18:13:30,878 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 18:21:17,615 (trainer:732) INFO: 41epoch:train:1-100batch: iter_time=3.473, forward_time=0.171, loss_ctc=70.840, loss_att=55.420, acc=0.704, loss=60.046, backward_time=1.043, grad_norm=129.418, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.661e-05, train_time=9.772 +[gpub002:0/64] 2023-07-12 18:23:33,343 (trainer:732) INFO: 41epoch:train:101-200batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.748, loss_att=50.057, acc=0.705, loss=55.364, backward_time=1.028, grad_norm=114.263, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.660e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 18:25:50,201 (trainer:732) INFO: 41epoch:train:201-300batch: iter_time=1.232e-04, forward_time=0.152, loss_ctc=66.301, loss_att=51.918, acc=0.721, loss=56.233, backward_time=1.028, grad_norm=123.052, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.660e-05, train_time=2.737 +[gpub002:0/64] 2023-07-12 18:28:19,026 (trainer:732) INFO: 41epoch:train:301-400batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=75.210, loss_att=59.758, acc=0.716, loss=64.394, backward_time=1.054, grad_norm=126.516, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.659e-05, train_time=2.976 +[gpub002:0/64] 2023-07-12 18:30:40,311 (trainer:732) INFO: 41epoch:train:401-500batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=60.935, loss_att=47.224, acc=0.714, loss=51.338, backward_time=1.039, grad_norm=134.990, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.658e-05, train_time=2.825 +[gpub002:0/64] 2023-07-12 18:32:59,302 (trainer:732) INFO: 41epoch:train:501-600batch: iter_time=1.050e-04, forward_time=0.147, loss_ctc=75.633, loss_att=61.020, acc=0.718, loss=65.404, backward_time=1.032, grad_norm=146.761, clip=100.000, loss_scale=3.894e+32, optim_step_time=0.182, optim0_lr0=5.657e-05, train_time=2.780 +[gpub002:0/64] 2023-07-12 18:35:20,968 (trainer:732) INFO: 41epoch:train:601-700batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=70.522, loss_att=53.175, acc=0.708, loss=58.379, backward_time=1.040, grad_norm=114.115, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.657e-05, train_time=2.833 +[gpub002:0/64] 2023-07-12 18:37:47,401 (trainer:732) INFO: 41epoch:train:701-800batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=71.912, loss_att=54.658, acc=0.713, loss=59.834, backward_time=1.046, grad_norm=114.837, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.656e-05, train_time=2.928 +[gpub002:0/64] 2023-07-12 18:37:49,975 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 18:38:40,955 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-12 18:38:58,926 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 18:39:02,533 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 18:39:02,533 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-12 18:39:02,539 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 18:44:47,143 (trainer:732) INFO: 41epoch:train:801-900batch: iter_time=1.647, forward_time=0.146, loss_ctc=72.599, loss_att=54.115, acc=0.703, loss=59.660, backward_time=1.040, grad_norm=128.541, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.655e-05, train_time=8.395 +[gpub002:0/64] 2023-07-12 18:45:06,195 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 18:47:03,912 (trainer:732) INFO: 41epoch:train:901-1000batch: iter_time=1.239e-04, forward_time=0.145, loss_ctc=70.940, loss_att=54.535, acc=0.709, loss=59.457, backward_time=1.029, grad_norm=120.295, clip=100.000, loss_scale=1.821e+32, optim_step_time=0.182, optim0_lr0=5.655e-05, train_time=2.735 +[gpub002:0/64] 2023-07-12 18:49:19,533 (trainer:732) INFO: 41epoch:train:1001-1100batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=61.609, loss_att=44.306, acc=0.724, loss=49.497, backward_time=1.027, grad_norm=111.947, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.654e-05, train_time=2.712 +[gpub002:0/64] 2023-07-12 18:51:36,281 (trainer:732) INFO: 41epoch:train:1101-1200batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=73.032, loss_att=63.628, acc=0.705, loss=66.449, backward_time=1.034, grad_norm=164.160, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.653e-05, train_time=2.735 +[gpub002:0/64] 2023-07-12 18:53:52,356 (trainer:732) INFO: 41epoch:train:1201-1300batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=68.936, loss_att=48.977, acc=0.729, loss=54.965, backward_time=1.030, grad_norm=118.209, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.652e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 18:56:08,340 (trainer:732) INFO: 41epoch:train:1301-1400batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=72.457, loss_att=57.311, acc=0.711, loss=61.854, backward_time=1.030, grad_norm=123.795, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.652e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 18:58:24,327 (trainer:732) INFO: 41epoch:train:1401-1500batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=69.223, loss_att=53.363, acc=0.719, loss=58.121, backward_time=1.030, grad_norm=100.726, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.651e-05, train_time=2.720 +[gpub002:0/64] 2023-07-12 19:00:40,397 (trainer:732) INFO: 41epoch:train:1501-1600batch: iter_time=1.334e-04, forward_time=0.146, loss_ctc=71.626, loss_att=57.110, acc=0.711, loss=61.465, backward_time=1.031, grad_norm=128.152, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.650e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 19:02:11,569 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-12 19:02:29,844 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 19:02:33,282 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 19:02:33,282 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-12 19:02:33,289 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 19:08:06,638 (trainer:732) INFO: 41epoch:train:1601-1700batch: iter_time=1.676, forward_time=0.169, loss_ctc=70.421, loss_att=49.398, acc=0.710, loss=55.705, backward_time=1.041, grad_norm=138.098, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.650e-05, train_time=8.923 +[gpub002:0/64] 2023-07-12 19:10:23,384 (trainer:732) INFO: 41epoch:train:1701-1800batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=69.158, loss_att=55.267, acc=0.709, loss=59.434, backward_time=1.031, grad_norm=104.995, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.649e-05, train_time=2.736 +[gpub002:0/64] 2023-07-12 19:12:38,917 (trainer:732) INFO: 41epoch:train:1801-1900batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=66.374, loss_att=50.597, acc=0.699, loss=55.330, backward_time=1.027, grad_norm=112.628, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.648e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 19:14:54,622 (trainer:732) INFO: 41epoch:train:1901-2000batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=68.154, loss_att=57.325, acc=0.709, loss=60.574, backward_time=1.027, grad_norm=103.678, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.647e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 19:17:10,478 (trainer:732) INFO: 41epoch:train:2001-2100batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=72.381, loss_att=54.045, acc=0.729, loss=59.546, backward_time=1.027, grad_norm=114.921, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.647e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 19:19:26,375 (trainer:732) INFO: 41epoch:train:2101-2200batch: iter_time=1.268e-04, forward_time=0.145, loss_ctc=68.777, loss_att=54.166, acc=0.702, loss=58.549, backward_time=1.028, grad_norm=113.943, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.646e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 19:21:42,063 (trainer:732) INFO: 41epoch:train:2201-2300batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=63.800, loss_att=49.285, acc=0.717, loss=53.640, backward_time=1.027, grad_norm=146.299, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.645e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 19:23:57,840 (trainer:732) INFO: 41epoch:train:2301-2400batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=74.630, loss_att=56.323, acc=0.712, loss=61.815, backward_time=1.028, grad_norm=111.859, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.644e-05, train_time=2.715 +[gpub002:0/64] 2023-07-12 19:26:19,254 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-12 19:26:37,400 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 19:26:40,808 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 19:26:40,808 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-12 19:26:40,814 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 19:29:54,444 (trainer:732) INFO: 41epoch:train:2401-2500batch: iter_time=2.164, forward_time=0.165, loss_ctc=73.293, loss_att=57.191, acc=0.704, loss=62.022, backward_time=1.035, grad_norm=154.823, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.644e-05, train_time=7.132 +[gpub002:0/64] 2023-07-12 19:32:11,981 (trainer:732) INFO: 41epoch:train:2501-2600batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=70.299, loss_att=55.298, acc=0.703, loss=59.798, backward_time=1.037, grad_norm=109.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.643e-05, train_time=2.751 +[gpub002:0/64] 2023-07-12 19:34:27,583 (trainer:732) INFO: 41epoch:train:2601-2700batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=65.933, loss_att=48.206, acc=0.704, loss=53.524, backward_time=1.028, grad_norm=91.627, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.642e-05, train_time=2.712 +[gpub002:0/64] 2023-07-12 19:36:43,388 (trainer:732) INFO: 41epoch:train:2701-2800batch: iter_time=1.208e-04, forward_time=0.147, loss_ctc=64.547, loss_att=51.693, acc=0.716, loss=55.549, backward_time=1.027, grad_norm=123.357, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.642e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 19:38:59,319 (trainer:732) INFO: 41epoch:train:2801-2900batch: iter_time=1.147e-04, forward_time=0.145, loss_ctc=74.813, loss_att=58.932, acc=0.715, loss=63.696, backward_time=1.030, grad_norm=122.567, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.641e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 19:41:15,045 (trainer:732) INFO: 41epoch:train:2901-3000batch: iter_time=1.152e-04, forward_time=0.145, loss_ctc=61.440, loss_att=45.559, acc=0.725, loss=50.324, backward_time=1.028, grad_norm=124.359, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.640e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 19:43:31,121 (trainer:732) INFO: 41epoch:train:3001-3100batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=73.113, loss_att=58.408, acc=0.712, loss=62.819, backward_time=1.028, grad_norm=132.870, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.639e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 19:45:49,866 (trainer:732) INFO: 41epoch:train:3101-3200batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=70.690, loss_att=51.235, acc=0.716, loss=57.071, backward_time=1.033, grad_norm=113.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.639e-05, train_time=2.775 +[gpub002:0/64] 2023-07-12 19:48:12,230 (trainer:732) INFO: 41epoch:train:3201-3300batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=72.441, loss_att=57.254, acc=0.711, loss=61.810, backward_time=1.033, grad_norm=107.815, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.638e-05, train_time=2.847 +[gpub002:0/64] 2023-07-12 19:49:02,365 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-12 19:49:20,319 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 19:49:23,731 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 19:49:23,731 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-12 19:49:23,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 19:54:17,214 (trainer:732) INFO: 41epoch:train:3301-3400batch: iter_time=1.844, forward_time=0.147, loss_ctc=67.576, loss_att=49.521, acc=0.713, loss=54.938, backward_time=1.043, grad_norm=133.347, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.637e-05, train_time=7.299 +[gpub002:0/64] 2023-07-12 19:56:33,870 (trainer:732) INFO: 41epoch:train:3401-3500batch: iter_time=1.057e-04, forward_time=0.146, loss_ctc=70.531, loss_att=54.713, acc=0.713, loss=59.458, backward_time=1.030, grad_norm=130.288, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.637e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 19:58:49,517 (trainer:732) INFO: 41epoch:train:3501-3600batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=60.897, loss_att=42.699, acc=0.730, loss=48.158, backward_time=1.028, grad_norm=137.043, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.636e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 20:01:05,892 (trainer:732) INFO: 41epoch:train:3601-3700batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=74.026, loss_att=64.166, acc=0.709, loss=67.124, backward_time=1.032, grad_norm=127.815, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.635e-05, train_time=2.727 +[gpub002:0/64] 2023-07-12 20:03:21,618 (trainer:732) INFO: 41epoch:train:3701-3800batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=67.233, loss_att=48.554, acc=0.733, loss=54.158, backward_time=1.028, grad_norm=114.242, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.634e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 20:05:37,755 (trainer:732) INFO: 41epoch:train:3801-3900batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=70.919, loss_att=56.270, acc=0.718, loss=60.665, backward_time=1.031, grad_norm=122.264, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.634e-05, train_time=2.723 +[gpub002:0/64] 2023-07-12 20:07:53,980 (trainer:732) INFO: 41epoch:train:3901-4000batch: iter_time=1.098e-04, forward_time=0.146, loss_ctc=69.754, loss_att=53.402, acc=0.720, loss=58.308, backward_time=1.032, grad_norm=108.612, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.633e-05, train_time=2.724 +[gpub002:0/64] 2023-07-12 20:10:10,380 (trainer:732) INFO: 41epoch:train:4001-4100batch: iter_time=1.024e-04, forward_time=0.147, loss_ctc=70.275, loss_att=56.231, acc=0.716, loss=60.444, backward_time=1.032, grad_norm=117.778, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.632e-05, train_time=2.728 +[gpub002:0/64] 2023-07-12 20:11:39,912 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-12 20:11:58,290 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 20:12:01,692 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 20:12:01,692 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-12 20:12:01,698 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 20:17:26,480 (trainer:732) INFO: 41epoch:train:4101-4200batch: iter_time=1.566, forward_time=0.146, loss_ctc=72.772, loss_att=55.012, acc=0.708, loss=60.340, backward_time=1.042, grad_norm=117.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.632e-05, train_time=8.722 +[gpub002:0/64] 2023-07-12 20:19:42,895 (trainer:732) INFO: 41epoch:train:4201-4300batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=67.515, loss_att=51.939, acc=0.704, loss=56.612, backward_time=1.030, grad_norm=123.653, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.631e-05, train_time=2.728 +[gpub002:0/64] 2023-07-12 20:21:58,413 (trainer:732) INFO: 41epoch:train:4301-4400batch: iter_time=1.320e-04, forward_time=0.145, loss_ctc=61.290, loss_att=47.503, acc=0.716, loss=51.639, backward_time=1.025, grad_norm=112.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.630e-05, train_time=2.710 +[gpub002:0/64] 2023-07-12 20:24:14,368 (trainer:732) INFO: 41epoch:train:4401-4500batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=71.825, loss_att=62.339, acc=0.697, loss=65.185, backward_time=1.029, grad_norm=117.005, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.629e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 20:26:30,216 (trainer:732) INFO: 41epoch:train:4501-4600batch: iter_time=1.354e-04, forward_time=0.145, loss_ctc=64.044, loss_att=44.250, acc=0.731, loss=50.189, backward_time=1.028, grad_norm=95.630, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.629e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 20:28:46,011 (trainer:732) INFO: 41epoch:train:4601-4700batch: iter_time=1.354e-04, forward_time=0.144, loss_ctc=72.970, loss_att=56.752, acc=0.705, loss=61.617, backward_time=1.029, grad_norm=111.368, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.628e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 20:31:02,650 (trainer:732) INFO: 41epoch:train:4701-4800batch: iter_time=1.450e-04, forward_time=0.144, loss_ctc=71.101, loss_att=53.521, acc=0.717, loss=58.795, backward_time=1.029, grad_norm=100.929, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.627e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 20:33:18,834 (trainer:732) INFO: 41epoch:train:4801-4900batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=72.523, loss_att=58.663, acc=0.707, loss=62.821, backward_time=1.031, grad_norm=127.161, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.627e-05, train_time=2.723 +[gpub002:0/64] 2023-07-12 20:35:36,747 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-12 20:35:54,833 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 20:35:58,239 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 20:35:58,239 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-12 20:35:58,246 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 20:41:04,776 (trainer:732) INFO: 41epoch:train:4901-5000batch: iter_time=1.597, forward_time=0.146, loss_ctc=69.320, loss_att=46.915, acc=0.725, loss=53.637, backward_time=1.038, grad_norm=113.892, clip=100.000, loss_scale=3.018e+32, optim_step_time=0.182, optim0_lr0=5.626e-05, train_time=9.319 +[gpub002:0/64] 2023-07-12 20:43:22,964 (trainer:732) INFO: 41epoch:train:5001-5100batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=68.557, loss_att=53.531, acc=0.716, loss=58.039, backward_time=1.036, grad_norm=109.852, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.625e-05, train_time=2.764 +[gpub002:0/64] 2023-07-12 20:45:38,905 (trainer:732) INFO: 41epoch:train:5101-5200batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=65.544, loss_att=48.308, acc=0.716, loss=53.479, backward_time=1.029, grad_norm=123.663, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.624e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 20:47:55,428 (trainer:732) INFO: 41epoch:train:5201-5300batch: iter_time=9.594e-05, forward_time=0.147, loss_ctc=69.697, loss_att=59.000, acc=0.717, loss=62.209, backward_time=1.031, grad_norm=109.695, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.624e-05, train_time=2.730 +[gpub002:0/64] 2023-07-12 20:50:11,373 (trainer:732) INFO: 41epoch:train:5301-5400batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=71.695, loss_att=51.848, acc=0.732, loss=57.802, backward_time=1.029, grad_norm=105.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.623e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 20:52:26,937 (trainer:732) INFO: 41epoch:train:5401-5500batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=69.713, loss_att=56.136, acc=0.698, loss=60.209, backward_time=1.026, grad_norm=124.763, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.622e-05, train_time=2.711 +[gpub002:0/64] 2023-07-12 20:54:42,808 (trainer:732) INFO: 41epoch:train:5501-5600batch: iter_time=1.183e-04, forward_time=0.145, loss_ctc=65.673, loss_att=49.982, acc=0.728, loss=54.690, backward_time=1.028, grad_norm=121.869, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.622e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 20:56:58,543 (trainer:732) INFO: 41epoch:train:5601-5700batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=72.242, loss_att=52.803, acc=0.723, loss=58.635, backward_time=1.027, grad_norm=111.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.621e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 20:59:14,728 (trainer:732) INFO: 41epoch:train:5701-5800batch: iter_time=9.577e-05, forward_time=0.145, loss_ctc=69.925, loss_att=54.850, acc=0.714, loss=59.373, backward_time=1.031, grad_norm=113.233, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.620e-05, train_time=2.723 +[gpub002:0/64] 2023-07-12 21:00:01,150 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-12 21:00:19,189 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 21:00:22,555 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 21:00:22,555 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-12 21:00:22,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 21:05:30,215 (trainer:732) INFO: 41epoch:train:5801-5900batch: iter_time=1.645, forward_time=0.193, loss_ctc=72.625, loss_att=53.896, acc=0.723, loss=59.515, backward_time=1.042, grad_norm=126.902, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.619e-05, train_time=7.509 +[gpub002:0/64] 2023-07-12 21:07:46,821 (trainer:732) INFO: 41epoch:train:5901-6000batch: iter_time=1.429e-04, forward_time=0.147, loss_ctc=66.989, loss_att=49.786, acc=0.709, loss=54.947, backward_time=1.029, grad_norm=130.905, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.619e-05, train_time=2.733 +[gpub002:0/64] 2023-07-12 21:10:03,539 (trainer:732) INFO: 41epoch:train:6001-6100batch: iter_time=1.134e-04, forward_time=0.149, loss_ctc=64.189, loss_att=51.518, acc=0.726, loss=55.319, backward_time=1.031, grad_norm=103.534, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.618e-05, train_time=2.734 +[gpub002:0/64] 2023-07-12 21:12:19,396 (trainer:732) INFO: 41epoch:train:6101-6200batch: iter_time=1.098e-04, forward_time=0.146, loss_ctc=72.559, loss_att=55.954, acc=0.724, loss=60.936, backward_time=1.029, grad_norm=122.019, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.617e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 21:14:35,307 (trainer:732) INFO: 41epoch:train:6201-6300batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=64.191, loss_att=50.498, acc=0.713, loss=54.606, backward_time=1.028, grad_norm=114.048, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.617e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 21:15:07,857 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 21:16:51,122 (trainer:732) INFO: 41epoch:train:6301-6400batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=70.103, loss_att=54.476, acc=0.732, loss=59.164, backward_time=1.030, grad_norm=103.656, clip=100.000, loss_scale=1.987e+32, optim_step_time=0.183, optim0_lr0=5.616e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 21:17:42,514 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-12 21:19:06,809 (trainer:732) INFO: 41epoch:train:6401-6500batch: iter_time=1.398e-04, forward_time=0.146, loss_ctc=70.713, loss_att=52.907, acc=0.719, loss=58.249, backward_time=1.028, grad_norm=129.552, clip=100.000, loss_scale=1.109e+32, optim_step_time=0.182, optim0_lr0=5.615e-05, train_time=2.714 +[gpub002:0/64] 2023-07-12 21:21:23,047 (trainer:732) INFO: 41epoch:train:6501-6600batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=71.542, loss_att=55.074, acc=0.720, loss=60.015, backward_time=1.031, grad_norm=119.497, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.615e-05, train_time=2.725 +[gpub002:0/64] 2023-07-12 21:23:04,288 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-12 21:23:22,599 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 21:23:26,047 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 21:23:26,047 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-12 21:23:26,053 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 21:28:01,974 (trainer:732) INFO: 41epoch:train:6601-6700batch: iter_time=2.543, forward_time=0.146, loss_ctc=71.485, loss_att=51.860, acc=0.717, loss=57.748, backward_time=1.039, grad_norm=121.242, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.614e-05, train_time=7.978 +[gpub002:0/64] 2023-07-12 21:30:19,098 (trainer:732) INFO: 41epoch:train:6701-6800batch: iter_time=1.407e-04, forward_time=0.145, loss_ctc=67.783, loss_att=52.647, acc=0.706, loss=57.188, backward_time=1.030, grad_norm=102.934, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.613e-05, train_time=2.742 +[gpub002:0/64] 2023-07-12 21:32:35,143 (trainer:732) INFO: 41epoch:train:6801-6900batch: iter_time=1.370e-04, forward_time=0.149, loss_ctc=60.432, loss_att=44.809, acc=0.723, loss=49.496, backward_time=1.030, grad_norm=143.111, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.612e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 21:34:51,046 (trainer:732) INFO: 41epoch:train:6901-7000batch: iter_time=1.716e-04, forward_time=0.147, loss_ctc=73.656, loss_att=65.329, acc=0.698, loss=67.827, backward_time=1.029, grad_norm=127.853, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.612e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 21:37:06,844 (trainer:732) INFO: 41epoch:train:7001-7100batch: iter_time=1.519e-04, forward_time=0.147, loss_ctc=63.569, loss_att=45.390, acc=0.737, loss=50.844, backward_time=1.028, grad_norm=115.348, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.611e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 21:39:22,832 (trainer:732) INFO: 41epoch:train:7101-7200batch: iter_time=1.483e-04, forward_time=0.147, loss_ctc=73.348, loss_att=57.141, acc=0.707, loss=62.003, backward_time=1.030, grad_norm=125.156, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.610e-05, train_time=2.720 +[gpub002:0/64] 2023-07-12 21:41:38,634 (trainer:732) INFO: 41epoch:train:7201-7300batch: iter_time=1.409e-04, forward_time=0.145, loss_ctc=68.433, loss_att=52.831, acc=0.715, loss=57.512, backward_time=1.026, grad_norm=117.453, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.610e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 21:43:54,806 (trainer:732) INFO: 41epoch:train:7301-7400batch: iter_time=1.454e-04, forward_time=0.148, loss_ctc=69.535, loss_att=57.942, acc=0.709, loss=61.420, backward_time=1.031, grad_norm=142.520, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.609e-05, train_time=2.723 +[gpub002:0/64] 2023-07-12 21:46:12,782 (trainer:732) INFO: 41epoch:train:7401-7500batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=67.374, loss_att=45.684, acc=0.729, loss=52.191, backward_time=1.031, grad_norm=135.947, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.608e-05, train_time=2.759 +[gpub002:0/64] 2023-07-12 21:46:15,854 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-12 21:46:34,163 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 21:46:37,582 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 21:46:37,582 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-12 21:46:37,588 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 21:54:09,248 (trainer:732) INFO: 41epoch:train:7501-7600batch: iter_time=1.603, forward_time=0.145, loss_ctc=71.396, loss_att=58.603, acc=0.700, loss=62.441, backward_time=1.042, grad_norm=141.498, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.607e-05, train_time=9.529 +[gpub002:0/64] 2023-07-12 21:56:25,789 (trainer:732) INFO: 41epoch:train:7601-7700batch: iter_time=1.471e-04, forward_time=0.144, loss_ctc=66.114, loss_att=48.625, acc=0.707, loss=53.872, backward_time=1.029, grad_norm=119.762, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.607e-05, train_time=2.731 +[gpub002:0/64] 2023-07-12 21:58:41,647 (trainer:732) INFO: 41epoch:train:7701-7800batch: iter_time=1.349e-04, forward_time=0.145, loss_ctc=63.466, loss_att=50.427, acc=0.716, loss=54.339, backward_time=1.028, grad_norm=113.922, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.606e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 22:00:59,471 (trainer:732) INFO: 41epoch:train:7801-7900batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=72.326, loss_att=56.756, acc=0.720, loss=61.427, backward_time=1.031, grad_norm=109.457, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.605e-05, train_time=2.756 +[gpub002:0/64] 2023-07-12 22:03:19,497 (trainer:732) INFO: 41epoch:train:7901-8000batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=60.819, loss_att=44.765, acc=0.722, loss=49.581, backward_time=1.033, grad_norm=106.824, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.605e-05, train_time=2.800 +[gpub002:0/64] 2023-07-12 22:05:35,694 (trainer:732) INFO: 41epoch:train:8001-8100batch: iter_time=1.268e-04, forward_time=0.146, loss_ctc=75.424, loss_att=59.810, acc=0.714, loss=64.495, backward_time=1.030, grad_norm=123.469, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.604e-05, train_time=2.724 +[gpub002:0/64] 2023-07-12 22:07:51,699 (trainer:732) INFO: 41epoch:train:8101-8200batch: iter_time=1.220e-04, forward_time=0.147, loss_ctc=69.288, loss_att=51.179, acc=0.716, loss=56.612, backward_time=1.029, grad_norm=117.088, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.603e-05, train_time=2.720 +[gpub002:0/64] 2023-07-12 22:10:07,627 (trainer:732) INFO: 41epoch:train:8201-8300batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=72.391, loss_att=55.439, acc=0.714, loss=60.525, backward_time=1.029, grad_norm=127.931, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.603e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 22:10:55,464 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-12 22:11:13,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 22:11:17,494 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 22:11:17,494 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-12 22:11:17,500 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 22:18:38,342 (trainer:732) INFO: 41epoch:train:8301-8400batch: iter_time=1.615, forward_time=0.147, loss_ctc=67.249, loss_att=51.366, acc=0.704, loss=56.131, backward_time=1.046, grad_norm=129.432, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.602e-05, train_time=10.214 +[gpub002:0/64] 2023-07-12 22:20:55,423 (trainer:732) INFO: 41epoch:train:8401-8500batch: iter_time=9.569e-05, forward_time=0.145, loss_ctc=68.689, loss_att=52.556, acc=0.712, loss=57.396, backward_time=1.031, grad_norm=129.356, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.601e-05, train_time=2.741 +[gpub002:0/64] 2023-07-12 22:23:11,495 (trainer:732) INFO: 41epoch:train:8501-8600batch: iter_time=9.187e-05, forward_time=0.146, loss_ctc=62.059, loss_att=44.195, acc=0.721, loss=49.554, backward_time=1.028, grad_norm=111.154, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.600e-05, train_time=2.721 +[gpub002:0/64] 2023-07-12 22:25:27,686 (trainer:732) INFO: 41epoch:train:8601-8700batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=71.896, loss_att=62.165, acc=0.700, loss=65.084, backward_time=1.030, grad_norm=133.087, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.600e-05, train_time=2.724 +[gpub002:0/64] 2023-07-12 22:27:43,333 (trainer:732) INFO: 41epoch:train:8701-8800batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=68.306, loss_att=48.020, acc=0.733, loss=54.106, backward_time=1.027, grad_norm=109.698, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.599e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 22:29:58,990 (trainer:732) INFO: 41epoch:train:8801-8900batch: iter_time=1.056e-04, forward_time=0.145, loss_ctc=70.034, loss_att=54.974, acc=0.709, loss=59.492, backward_time=1.026, grad_norm=125.573, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.598e-05, train_time=2.713 +[gpub002:0/64] 2023-07-12 22:32:14,802 (trainer:732) INFO: 41epoch:train:8901-9000batch: iter_time=9.824e-05, forward_time=0.146, loss_ctc=70.258, loss_att=54.489, acc=0.715, loss=59.220, backward_time=1.028, grad_norm=114.409, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.598e-05, train_time=2.716 +[gpub002:0/64] 2023-07-12 22:34:31,075 (trainer:732) INFO: 41epoch:train:9001-9100batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=68.492, loss_att=54.068, acc=0.716, loss=58.395, backward_time=1.029, grad_norm=140.544, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.597e-05, train_time=2.725 +[gpub002:0/64] 2023-07-12 22:36:03,875 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-12 22:36:22,352 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 22:36:26,145 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 22:36:26,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-12 22:36:26,151 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 22:42:43,033 (trainer:732) INFO: 41epoch:train:9101-9200batch: iter_time=1.881, forward_time=0.184, loss_ctc=68.469, loss_att=50.264, acc=0.718, loss=55.726, backward_time=1.044, grad_norm=106.621, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.596e-05, train_time=9.838 +[gpub002:0/64] 2023-07-12 22:45:00,788 (trainer:732) INFO: 41epoch:train:9201-9300batch: iter_time=1.293e-04, forward_time=0.148, loss_ctc=68.471, loss_att=53.732, acc=0.720, loss=58.153, backward_time=1.035, grad_norm=122.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.596e-05, train_time=2.755 +[gpub002:0/64] 2023-07-12 22:47:19,808 (trainer:732) INFO: 41epoch:train:9301-9400batch: iter_time=1.176e-04, forward_time=0.147, loss_ctc=65.914, loss_att=48.978, acc=0.714, loss=54.059, backward_time=1.037, grad_norm=109.971, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.595e-05, train_time=2.780 +[gpub002:0/64] 2023-07-12 22:49:36,096 (trainer:732) INFO: 41epoch:train:9401-9500batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=68.014, loss_att=54.562, acc=0.726, loss=58.597, backward_time=1.029, grad_norm=117.199, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.594e-05, train_time=2.726 +[gpub002:0/64] 2023-07-12 22:51:51,997 (trainer:732) INFO: 41epoch:train:9501-9600batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=71.249, loss_att=53.735, acc=0.733, loss=58.989, backward_time=1.029, grad_norm=138.178, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.593e-05, train_time=2.718 +[gpub002:0/64] 2023-07-12 22:54:07,882 (trainer:732) INFO: 41epoch:train:9601-9700batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=66.076, loss_att=52.907, acc=0.713, loss=56.858, backward_time=1.028, grad_norm=106.233, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.593e-05, train_time=2.717 +[gpub002:0/64] 2023-07-12 22:56:23,823 (trainer:732) INFO: 41epoch:train:9701-9800batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=62.985, loss_att=48.457, acc=0.729, loss=52.816, backward_time=1.029, grad_norm=112.368, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.592e-05, train_time=2.719 +[gpub002:0/64] 2023-07-12 22:58:40,560 (trainer:732) INFO: 41epoch:train:9801-9900batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=74.657, loss_att=55.990, acc=0.719, loss=61.590, backward_time=1.027, grad_norm=113.466, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.591e-05, train_time=2.735 +[gpub002:0/64] 2023-07-12 23:00:56,830 (trainer:732) INFO: 41epoch:train:9901-10000batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=72.076, loss_att=56.216, acc=0.714, loss=60.974, backward_time=1.030, grad_norm=131.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.591e-05, train_time=2.725 +[gpub002:0/64] 2023-07-12 23:18:11,678 (trainer:338) INFO: 41epoch results: [train] iter_time=0.233, forward_time=0.147, loss_ctc=69.121, loss_att=53.257, acc=0.715, loss=58.016, backward_time=1.032, grad_norm=120.506, clip=100.000, loss_scale=1.781e+32, optim_step_time=0.182, optim0_lr0=5.626e-05, train_time=3.453, time=4 hours, 48 minutes and 7.1 seconds, total_count=380000, gpu_max_cached_mem_GB=37.572, [valid] loss_ctc=44.125, cer_ctc=0.260, loss_att=39.542, acc=0.666, cer=0.430, wer=0.999, loss=40.917, time=8 minutes and 29.09 seconds, total_count=38962, gpu_max_cached_mem_GB=37.572, [att_plot] time=8 minutes and 26.85 seconds, total_count=0, gpu_max_cached_mem_GB=37.572 +[gpub002:0/64] 2023-07-12 23:18:30,669 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-12 23:18:30,800 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/36epoch.pth +[gpub002:0/64] 2023-07-12 23:18:30,800 (trainer:272) INFO: 42/50epoch started. Estimated time to finish: 1 day, 20 hours and 54 minutes +[gpub002:0/64] 2023-07-12 23:18:31,369 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-12 23:18:49,582 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-12 23:18:55,229 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-12 23:18:55,229 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-12 23:18:55,791 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-12 23:37:37,107 (trainer:732) INFO: 42epoch:train:1-100batch: iter_time=8.724, forward_time=1.113, loss_ctc=71.175, loss_att=49.430, acc=0.718, loss=55.953, backward_time=1.227, grad_norm=114.213, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=5.590e-05, train_time=22.913 +[gpub002:0/64] 2023-07-12 23:42:11,727 (trainer:732) INFO: 42epoch:train:101-200batch: iter_time=0.006, forward_time=0.982, loss_ctc=69.942, loss_att=54.374, acc=0.696, loss=59.044, backward_time=1.246, grad_norm=121.482, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.288, optim0_lr0=5.589e-05, train_time=5.491 +[gpub002:0/64] 2023-07-12 23:47:05,839 (trainer:732) INFO: 42epoch:train:201-300batch: iter_time=0.017, forward_time=1.206, loss_ctc=61.801, loss_att=45.949, acc=0.719, loss=50.705, backward_time=1.243, grad_norm=123.757, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.240, optim0_lr0=5.589e-05, train_time=5.881 +[gpub002:0/64] 2023-07-12 23:50:49,717 (trainer:732) INFO: 42epoch:train:301-400batch: iter_time=0.004, forward_time=0.743, loss_ctc=73.412, loss_att=51.589, acc=0.721, loss=58.136, backward_time=1.151, grad_norm=125.779, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.222, optim0_lr0=5.588e-05, train_time=4.480 +[gpub002:0/64] 2023-07-12 23:54:19,195 (trainer:732) INFO: 42epoch:train:401-500batch: iter_time=0.012, forward_time=0.551, loss_ctc=82.982, loss_att=60.880, acc=0.688, loss=67.511, backward_time=1.134, grad_norm=124.069, clip=100.000, loss_scale=1.314e+32, optim_step_time=0.209, optim0_lr0=5.587e-05, train_time=4.189 +[gpub002:0/64] 2023-07-12 23:57:28,757 (trainer:732) INFO: 42epoch:train:501-600batch: iter_time=0.002, forward_time=0.518, loss_ctc=78.082, loss_att=61.663, acc=0.708, loss=66.589, backward_time=1.094, grad_norm=124.871, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.201, optim0_lr0=5.586e-05, train_time=3.791 +[gpub002:0/64] 2023-07-13 00:00:25,025 (trainer:732) INFO: 42epoch:train:601-700batch: iter_time=6.864e-04, forward_time=0.420, loss_ctc=79.441, loss_att=60.935, acc=0.691, loss=66.487, backward_time=1.078, grad_norm=128.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.199, optim0_lr0=5.586e-05, train_time=3.526 +[gpub002:0/64] 2023-07-13 00:03:02,236 (trainer:732) INFO: 42epoch:train:701-800batch: iter_time=4.233e-04, forward_time=0.282, loss_ctc=72.996, loss_att=53.014, acc=0.712, loss=59.009, backward_time=1.066, grad_norm=122.990, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.197, optim0_lr0=5.585e-05, train_time=3.144 +[gpub002:0/64] 2023-07-13 00:04:00,778 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 00:04:18,719 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 00:04:22,403 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 00:04:22,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 00:04:22,409 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 00:09:56,081 (trainer:732) INFO: 42epoch:train:801-900batch: iter_time=2.590, forward_time=0.185, loss_ctc=73.051, loss_att=54.592, acc=0.715, loss=60.130, backward_time=1.048, grad_norm=128.053, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.584e-05, train_time=8.279 +[gpub002:0/64] 2023-07-13 00:12:13,867 (trainer:732) INFO: 42epoch:train:901-1000batch: iter_time=1.283e-04, forward_time=0.148, loss_ctc=66.524, loss_att=49.934, acc=0.708, loss=54.911, backward_time=1.035, grad_norm=121.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.584e-05, train_time=2.756 +[gpub002:0/64] 2023-07-13 00:14:30,359 (trainer:732) INFO: 42epoch:train:1001-1100batch: iter_time=1.139e-04, forward_time=0.147, loss_ctc=61.394, loss_att=46.990, acc=0.713, loss=51.311, backward_time=1.031, grad_norm=98.338, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.583e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 00:16:46,091 (trainer:732) INFO: 42epoch:train:1101-1200batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=68.116, loss_att=48.127, acc=0.735, loss=54.124, backward_time=1.028, grad_norm=118.499, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.582e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 00:19:03,313 (trainer:732) INFO: 42epoch:train:1201-1300batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=78.528, loss_att=57.137, acc=0.710, loss=63.554, backward_time=1.036, grad_norm=124.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.582e-05, train_time=2.744 +[gpub002:0/64] 2023-07-13 00:21:20,377 (trainer:732) INFO: 42epoch:train:1301-1400batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=79.593, loss_att=62.972, acc=0.704, loss=67.958, backward_time=1.031, grad_norm=134.092, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.581e-05, train_time=2.741 +[gpub002:0/64] 2023-07-13 00:23:54,097 (trainer:732) INFO: 42epoch:train:1401-1500batch: iter_time=1.091e-04, forward_time=0.145, loss_ctc=74.007, loss_att=54.221, acc=0.718, loss=60.157, backward_time=1.048, grad_norm=127.878, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.580e-05, train_time=3.074 +[gpub002:0/64] 2023-07-13 00:26:09,909 (trainer:732) INFO: 42epoch:train:1501-1600batch: iter_time=1.349e-04, forward_time=0.144, loss_ctc=75.275, loss_att=58.574, acc=0.702, loss=63.584, backward_time=1.029, grad_norm=121.251, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.579e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 00:27:59,966 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 00:28:18,102 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 00:28:22,132 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 00:28:22,141 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 00:28:22,317 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 00:35:20,703 (trainer:732) INFO: 42epoch:train:1601-1700batch: iter_time=4.060, forward_time=0.198, loss_ctc=75.398, loss_att=54.018, acc=0.726, loss=60.432, backward_time=1.041, grad_norm=136.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.579e-05, train_time=11.016 +[gpub002:0/64] 2023-07-13 00:36:32,130 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 00:37:37,330 (trainer:732) INFO: 42epoch:train:1701-1800batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=64.814, loss_att=48.109, acc=0.703, loss=53.121, backward_time=1.033, grad_norm=135.699, clip=100.000, loss_scale=1.225e+32, optim_step_time=0.182, optim0_lr0=5.578e-05, train_time=2.732 +[gpub002:0/64] 2023-07-13 00:39:53,639 (trainer:732) INFO: 42epoch:train:1801-1900batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=62.019, loss_att=46.732, acc=0.717, loss=51.318, backward_time=1.032, grad_norm=99.223, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.577e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 00:42:09,855 (trainer:732) INFO: 42epoch:train:1901-2000batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=68.823, loss_att=48.017, acc=0.738, loss=54.259, backward_time=1.028, grad_norm=118.420, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.577e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 00:44:25,673 (trainer:732) INFO: 42epoch:train:2001-2100batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=81.313, loss_att=58.446, acc=0.706, loss=65.306, backward_time=1.028, grad_norm=134.672, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.576e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 00:46:41,649 (trainer:732) INFO: 42epoch:train:2101-2200batch: iter_time=1.066e-04, forward_time=0.146, loss_ctc=78.344, loss_att=62.937, acc=0.702, loss=67.559, backward_time=1.030, grad_norm=151.716, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.575e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 00:48:57,917 (trainer:732) INFO: 42epoch:train:2201-2300batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=75.719, loss_att=57.616, acc=0.712, loss=63.047, backward_time=1.032, grad_norm=184.811, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.575e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 00:51:28,156 (trainer:732) INFO: 42epoch:train:2301-2400batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=73.652, loss_att=53.750, acc=0.713, loss=59.720, backward_time=1.049, grad_norm=115.411, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.574e-05, train_time=3.005 +[gpub002:0/64] 2023-07-13 00:53:44,309 (trainer:732) INFO: 42epoch:train:2401-2500batch: iter_time=1.099e-04, forward_time=0.146, loss_ctc=70.966, loss_att=49.355, acc=0.718, loss=55.839, backward_time=1.028, grad_norm=106.942, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.573e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 00:53:46,681 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 00:54:04,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 00:54:08,345 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 00:54:08,345 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 00:54:08,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 01:01:39,737 (trainer:732) INFO: 42epoch:train:2501-2600batch: iter_time=1.820, forward_time=0.145, loss_ctc=69.815, loss_att=48.599, acc=0.726, loss=54.963, backward_time=1.041, grad_norm=140.834, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.573e-05, train_time=9.508 +[gpub002:0/64] 2023-07-13 01:03:56,212 (trainer:732) INFO: 42epoch:train:2601-2700batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=67.653, loss_att=52.431, acc=0.712, loss=56.997, backward_time=1.031, grad_norm=109.062, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.572e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 01:06:11,899 (trainer:732) INFO: 42epoch:train:2701-2800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=59.663, loss_att=44.027, acc=0.729, loss=48.718, backward_time=1.028, grad_norm=114.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.571e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 01:08:27,521 (trainer:732) INFO: 42epoch:train:2801-2900batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=71.051, loss_att=47.872, acc=0.736, loss=54.826, backward_time=1.028, grad_norm=161.477, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.570e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 01:10:43,390 (trainer:732) INFO: 42epoch:train:2901-3000batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=80.680, loss_att=59.876, acc=0.698, loss=66.117, backward_time=1.029, grad_norm=125.354, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.570e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 01:12:59,481 (trainer:732) INFO: 42epoch:train:3001-3100batch: iter_time=1.471e-04, forward_time=0.146, loss_ctc=74.273, loss_att=57.896, acc=0.724, loss=62.809, backward_time=1.030, grad_norm=158.704, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.569e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 01:15:15,668 (trainer:732) INFO: 42epoch:train:3101-3200batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=78.031, loss_att=59.327, acc=0.704, loss=64.938, backward_time=1.030, grad_norm=153.548, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.568e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 01:17:36,473 (trainer:732) INFO: 42epoch:train:3201-3300batch: iter_time=1.264e-04, forward_time=0.145, loss_ctc=73.496, loss_att=52.683, acc=0.721, loss=58.927, backward_time=1.028, grad_norm=135.257, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.568e-05, train_time=2.816 +[gpub002:0/64] 2023-07-13 01:18:26,019 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 01:18:44,507 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 01:18:47,930 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 01:18:47,930 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 01:18:47,937 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 01:25:11,697 (trainer:732) INFO: 42epoch:train:3301-3400batch: iter_time=1.638, forward_time=0.194, loss_ctc=72.538, loss_att=53.392, acc=0.723, loss=59.136, backward_time=1.050, grad_norm=121.024, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=5.567e-05, train_time=9.103 +[gpub002:0/64] 2023-07-13 01:27:27,626 (trainer:732) INFO: 42epoch:train:3401-3500batch: iter_time=1.464e-04, forward_time=0.146, loss_ctc=67.316, loss_att=49.889, acc=0.703, loss=55.117, backward_time=1.029, grad_norm=129.513, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.566e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 01:29:43,626 (trainer:732) INFO: 42epoch:train:3501-3600batch: iter_time=1.305e-04, forward_time=0.146, loss_ctc=61.606, loss_att=46.689, acc=0.718, loss=51.164, backward_time=1.029, grad_norm=108.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.566e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 01:31:59,663 (trainer:732) INFO: 42epoch:train:3601-3700batch: iter_time=1.609e-04, forward_time=0.146, loss_ctc=67.195, loss_att=48.227, acc=0.733, loss=53.917, backward_time=1.029, grad_norm=126.501, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.565e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 01:34:15,629 (trainer:732) INFO: 42epoch:train:3701-3800batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=76.534, loss_att=56.304, acc=0.712, loss=62.373, backward_time=1.030, grad_norm=117.496, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.564e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 01:36:31,874 (trainer:732) INFO: 42epoch:train:3801-3900batch: iter_time=1.527e-04, forward_time=0.147, loss_ctc=79.688, loss_att=61.818, acc=0.707, loss=67.179, backward_time=1.031, grad_norm=127.586, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.564e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 01:38:47,365 (trainer:732) INFO: 42epoch:train:3901-4000batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=73.126, loss_att=53.845, acc=0.710, loss=59.629, backward_time=1.026, grad_norm=113.224, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.563e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 01:41:03,491 (trainer:732) INFO: 42epoch:train:4001-4100batch: iter_time=1.410e-04, forward_time=0.147, loss_ctc=73.877, loss_att=58.523, acc=0.696, loss=63.129, backward_time=1.030, grad_norm=130.725, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.562e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 01:42:49,251 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 01:43:07,433 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 01:43:10,890 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 01:43:10,891 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-13 01:43:10,897 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 01:48:33,517 (trainer:732) INFO: 42epoch:train:4101-4200batch: iter_time=2.988, forward_time=0.147, loss_ctc=74.164, loss_att=53.456, acc=0.719, loss=59.668, backward_time=1.039, grad_norm=115.908, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.561e-05, train_time=9.000 +[gpub002:0/64] 2023-07-13 01:50:50,772 (trainer:732) INFO: 42epoch:train:4201-4300batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=64.361, loss_att=46.601, acc=0.711, loss=51.929, backward_time=1.033, grad_norm=103.878, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.561e-05, train_time=2.745 +[gpub002:0/64] 2023-07-13 01:53:07,508 (trainer:732) INFO: 42epoch:train:4301-4400batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=61.857, loss_att=46.652, acc=0.722, loss=51.213, backward_time=1.030, grad_norm=101.562, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.560e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 01:55:23,236 (trainer:732) INFO: 42epoch:train:4401-4500batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=69.241, loss_att=48.461, acc=0.741, loss=54.695, backward_time=1.028, grad_norm=128.802, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.559e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 01:57:42,177 (trainer:732) INFO: 42epoch:train:4501-4600batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=80.204, loss_att=58.270, acc=0.708, loss=64.850, backward_time=1.034, grad_norm=105.180, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.559e-05, train_time=2.779 +[gpub002:0/64] 2023-07-13 01:59:58,237 (trainer:732) INFO: 42epoch:train:4601-4700batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=76.178, loss_att=61.031, acc=0.708, loss=65.575, backward_time=1.030, grad_norm=119.146, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.558e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 02:02:14,580 (trainer:732) INFO: 42epoch:train:4701-4800batch: iter_time=1.163e-04, forward_time=0.147, loss_ctc=75.844, loss_att=58.430, acc=0.715, loss=63.654, backward_time=1.034, grad_norm=126.411, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.557e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 02:04:30,380 (trainer:732) INFO: 42epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=73.955, loss_att=54.425, acc=0.710, loss=60.284, backward_time=1.028, grad_norm=110.702, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.557e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 02:06:46,081 (trainer:732) INFO: 42epoch:train:4901-5000batch: iter_time=1.107e-04, forward_time=0.146, loss_ctc=71.515, loss_att=48.756, acc=0.724, loss=55.584, backward_time=1.029, grad_norm=124.882, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.556e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 02:06:47,710 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 02:07:06,009 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 02:07:09,425 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 02:07:09,425 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 02:07:09,432 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 02:14:16,140 (trainer:732) INFO: 42epoch:train:5001-5100batch: iter_time=1.636, forward_time=0.145, loss_ctc=69.401, loss_att=50.532, acc=0.709, loss=56.193, backward_time=1.040, grad_norm=142.315, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.555e-05, train_time=9.001 +[gpub002:0/64] 2023-07-13 02:16:32,200 (trainer:732) INFO: 42epoch:train:5101-5200batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=65.757, loss_att=49.932, acc=0.709, loss=54.679, backward_time=1.028, grad_norm=114.490, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.555e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 02:18:52,290 (trainer:732) INFO: 42epoch:train:5201-5300batch: iter_time=1.255e-04, forward_time=0.144, loss_ctc=64.010, loss_att=44.270, acc=0.738, loss=50.192, backward_time=1.032, grad_norm=105.407, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.554e-05, train_time=2.802 +[gpub002:0/64] 2023-07-13 02:21:08,766 (trainer:732) INFO: 42epoch:train:5301-5400batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=70.481, loss_att=53.258, acc=0.712, loss=58.425, backward_time=1.033, grad_norm=117.124, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.553e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 02:23:24,679 (trainer:732) INFO: 42epoch:train:5401-5500batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=79.697, loss_att=60.747, acc=0.706, loss=66.432, backward_time=1.030, grad_norm=129.480, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.553e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 02:25:41,523 (trainer:732) INFO: 42epoch:train:5501-5600batch: iter_time=1.228e-04, forward_time=0.149, loss_ctc=72.910, loss_att=55.688, acc=0.712, loss=60.854, backward_time=1.030, grad_norm=149.499, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=5.552e-05, train_time=2.737 +[gpub002:0/64] 2023-07-13 02:27:57,260 (trainer:732) INFO: 42epoch:train:5601-5700batch: iter_time=1.284e-04, forward_time=0.145, loss_ctc=77.018, loss_att=59.105, acc=0.700, loss=64.479, backward_time=1.028, grad_norm=131.998, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.551e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 02:30:13,284 (trainer:732) INFO: 42epoch:train:5701-5800batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=73.572, loss_att=53.577, acc=0.722, loss=59.576, backward_time=1.030, grad_norm=103.209, clip=100.000, loss_scale=1.201e+32, optim_step_time=0.182, optim0_lr0=5.551e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 02:31:01,533 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 02:31:19,805 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 02:31:23,527 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 02:31:23,527 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 02:31:23,533 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 02:38:09,529 (trainer:732) INFO: 42epoch:train:5801-5900batch: iter_time=1.672, forward_time=0.223, loss_ctc=66.753, loss_att=45.931, acc=0.710, loss=52.177, backward_time=1.044, grad_norm=115.552, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.550e-05, train_time=9.524 +[gpub002:0/64] 2023-07-13 02:40:27,216 (trainer:732) INFO: 42epoch:train:5901-6000batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=66.955, loss_att=51.180, acc=0.714, loss=55.913, backward_time=1.032, grad_norm=103.524, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.549e-05, train_time=2.754 +[gpub002:0/64] 2023-07-13 02:42:42,739 (trainer:732) INFO: 42epoch:train:6001-6100batch: iter_time=1.289e-04, forward_time=0.144, loss_ctc=62.586, loss_att=45.365, acc=0.728, loss=50.531, backward_time=1.026, grad_norm=94.871, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.548e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 02:44:58,673 (trainer:732) INFO: 42epoch:train:6101-6200batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=70.502, loss_att=52.029, acc=0.726, loss=57.571, backward_time=1.029, grad_norm=120.129, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.548e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 02:47:14,390 (trainer:732) INFO: 42epoch:train:6201-6300batch: iter_time=1.266e-04, forward_time=0.144, loss_ctc=80.294, loss_att=60.866, acc=0.691, loss=66.694, backward_time=1.027, grad_norm=141.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.547e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 02:49:29,945 (trainer:732) INFO: 42epoch:train:6301-6400batch: iter_time=1.154e-04, forward_time=0.144, loss_ctc=74.026, loss_att=55.778, acc=0.718, loss=61.252, backward_time=1.026, grad_norm=131.830, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.546e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 02:51:45,921 (trainer:732) INFO: 42epoch:train:6401-6500batch: iter_time=1.322e-04, forward_time=0.145, loss_ctc=77.487, loss_att=57.948, acc=0.707, loss=63.810, backward_time=1.028, grad_norm=135.680, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.546e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 02:54:01,435 (trainer:732) INFO: 42epoch:train:6501-6600batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=69.023, loss_att=50.317, acc=0.715, loss=55.929, backward_time=1.026, grad_norm=121.503, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.545e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 02:55:35,752 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 02:55:53,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 02:55:57,465 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 02:55:57,465 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 02:55:57,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 03:02:27,111 (trainer:732) INFO: 42epoch:train:6601-6700batch: iter_time=3.598, forward_time=0.208, loss_ctc=68.057, loss_att=47.036, acc=0.718, loss=53.342, backward_time=1.042, grad_norm=118.857, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.544e-05, train_time=10.113 +[gpub002:0/64] 2023-07-13 03:04:44,821 (trainer:732) INFO: 42epoch:train:6701-6800batch: iter_time=1.149e-04, forward_time=0.148, loss_ctc=68.438, loss_att=52.085, acc=0.723, loss=56.991, backward_time=1.034, grad_norm=140.459, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.544e-05, train_time=2.754 +[gpub002:0/64] 2023-07-13 03:07:01,317 (trainer:732) INFO: 42epoch:train:6801-6900batch: iter_time=1.279e-04, forward_time=0.145, loss_ctc=64.735, loss_att=49.425, acc=0.714, loss=54.018, backward_time=1.027, grad_norm=114.945, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.543e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 03:09:19,001 (trainer:732) INFO: 42epoch:train:6901-7000batch: iter_time=0.004, forward_time=0.147, loss_ctc=64.172, loss_att=44.513, acc=0.739, loss=50.411, backward_time=1.035, grad_norm=108.401, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.542e-05, train_time=2.753 +[gpub002:0/64] 2023-07-13 03:11:34,913 (trainer:732) INFO: 42epoch:train:7001-7100batch: iter_time=0.002, forward_time=0.145, loss_ctc=70.024, loss_att=52.180, acc=0.717, loss=57.534, backward_time=1.028, grad_norm=133.620, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.542e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 03:14:01,730 (trainer:732) INFO: 42epoch:train:7101-7200batch: iter_time=1.283e-04, forward_time=0.217, loss_ctc=80.096, loss_att=59.082, acc=0.709, loss=65.386, backward_time=1.048, grad_norm=121.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=5.541e-05, train_time=2.936 +[gpub002:0/64] 2023-07-13 03:16:18,403 (trainer:732) INFO: 42epoch:train:7201-7300batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=72.737, loss_att=55.530, acc=0.720, loss=60.692, backward_time=1.031, grad_norm=126.104, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.540e-05, train_time=2.733 +[gpub002:0/64] 2023-07-13 03:18:34,604 (trainer:732) INFO: 42epoch:train:7301-7400batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=76.887, loss_att=59.261, acc=0.713, loss=64.549, backward_time=1.028, grad_norm=153.123, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.540e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 03:20:50,723 (trainer:732) INFO: 42epoch:train:7401-7500batch: iter_time=1.391e-04, forward_time=0.147, loss_ctc=74.305, loss_att=53.789, acc=0.728, loss=59.944, backward_time=1.028, grad_norm=120.151, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.539e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 03:21:11,975 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 03:21:30,453 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 03:21:33,941 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 03:21:33,941 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 03:21:33,947 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 03:28:50,459 (trainer:732) INFO: 42epoch:train:7501-7600batch: iter_time=3.260, forward_time=0.146, loss_ctc=68.511, loss_att=47.602, acc=0.728, loss=53.875, backward_time=1.046, grad_norm=119.300, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.538e-05, train_time=9.595 +[gpub002:0/64] 2023-07-13 03:31:08,080 (trainer:732) INFO: 42epoch:train:7601-7700batch: iter_time=1.319e-04, forward_time=0.145, loss_ctc=65.544, loss_att=50.965, acc=0.711, loss=55.339, backward_time=1.032, grad_norm=117.013, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.538e-05, train_time=2.752 +[gpub002:0/64] 2023-07-13 03:33:24,531 (trainer:732) INFO: 42epoch:train:7701-7800batch: iter_time=1.504e-04, forward_time=0.147, loss_ctc=59.954, loss_att=44.149, acc=0.726, loss=48.890, backward_time=1.030, grad_norm=111.944, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.537e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 03:35:39,885 (trainer:732) INFO: 42epoch:train:7801-7900batch: iter_time=1.352e-04, forward_time=0.144, loss_ctc=71.306, loss_att=50.202, acc=0.729, loss=56.534, backward_time=1.025, grad_norm=132.190, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.536e-05, train_time=2.707 +[gpub002:0/64] 2023-07-13 03:38:11,937 (trainer:732) INFO: 42epoch:train:7901-8000batch: iter_time=1.097e-04, forward_time=0.144, loss_ctc=81.280, loss_att=59.731, acc=0.695, loss=66.195, backward_time=1.044, grad_norm=116.322, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.536e-05, train_time=3.041 +[gpub002:0/64] 2023-07-13 03:40:27,681 (trainer:732) INFO: 42epoch:train:8001-8100batch: iter_time=1.301e-04, forward_time=0.144, loss_ctc=73.657, loss_att=57.378, acc=0.722, loss=62.262, backward_time=1.028, grad_norm=119.137, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.535e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 03:42:43,340 (trainer:732) INFO: 42epoch:train:8101-8200batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=77.258, loss_att=59.529, acc=0.697, loss=64.848, backward_time=1.029, grad_norm=124.385, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.534e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 03:44:58,939 (trainer:732) INFO: 42epoch:train:8201-8300batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=70.039, loss_att=50.529, acc=0.722, loss=56.382, backward_time=1.028, grad_norm=114.947, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.533e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 03:45:46,940 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-13 03:46:05,424 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 03:46:08,900 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 03:46:08,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-13 03:46:08,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 03:53:07,279 (trainer:732) INFO: 42epoch:train:8301-8400batch: iter_time=1.680, forward_time=0.177, loss_ctc=70.901, loss_att=52.528, acc=0.721, loss=58.040, backward_time=1.041, grad_norm=127.142, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.533e-05, train_time=9.767 +[gpub002:0/64] 2023-07-13 03:55:25,674 (trainer:732) INFO: 42epoch:train:8401-8500batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=63.924, loss_att=47.108, acc=0.710, loss=52.153, backward_time=1.037, grad_norm=118.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.532e-05, train_time=2.768 +[gpub002:0/64] 2023-07-13 03:57:42,100 (trainer:732) INFO: 42epoch:train:8501-8600batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=63.131, loss_att=46.587, acc=0.723, loss=51.551, backward_time=1.029, grad_norm=92.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.531e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 04:00:13,714 (trainer:732) INFO: 42epoch:train:8601-8700batch: iter_time=1.103e-04, forward_time=0.144, loss_ctc=68.642, loss_att=48.513, acc=0.737, loss=54.552, backward_time=1.043, grad_norm=114.432, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.531e-05, train_time=3.032 +[gpub002:0/64] 2023-07-13 04:02:29,944 (trainer:732) INFO: 42epoch:train:8701-8800batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=75.843, loss_att=56.720, acc=0.713, loss=62.457, backward_time=1.028, grad_norm=108.012, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.530e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 04:04:45,889 (trainer:732) INFO: 42epoch:train:8801-8900batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=77.727, loss_att=61.456, acc=0.711, loss=66.338, backward_time=1.029, grad_norm=118.766, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.529e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 04:07:01,510 (trainer:732) INFO: 42epoch:train:8901-9000batch: iter_time=1.245e-04, forward_time=0.144, loss_ctc=72.948, loss_att=53.187, acc=0.715, loss=59.115, backward_time=1.027, grad_norm=137.792, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.529e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 04:09:17,733 (trainer:732) INFO: 42epoch:train:9001-9100batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=75.533, loss_att=57.845, acc=0.699, loss=63.151, backward_time=1.031, grad_norm=119.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.528e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 04:10:50,524 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-13 04:11:08,648 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 04:11:12,048 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 04:11:12,048 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 04:11:12,055 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 04:16:46,199 (trainer:732) INFO: 42epoch:train:9101-9200batch: iter_time=1.637, forward_time=0.145, loss_ctc=71.833, loss_att=48.627, acc=0.732, loss=55.589, backward_time=1.042, grad_norm=104.157, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.527e-05, train_time=8.969 +[gpub002:0/64] 2023-07-13 04:19:03,487 (trainer:732) INFO: 42epoch:train:9201-9300batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=67.367, loss_att=51.468, acc=0.722, loss=56.238, backward_time=1.032, grad_norm=120.743, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.527e-05, train_time=2.746 +[gpub002:0/64] 2023-07-13 04:21:20,258 (trainer:732) INFO: 42epoch:train:9301-9400batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=65.408, loss_att=50.219, acc=0.717, loss=54.776, backward_time=1.030, grad_norm=110.375, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.526e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 04:23:36,660 (trainer:732) INFO: 42epoch:train:9401-9500batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=63.769, loss_att=43.754, acc=0.739, loss=49.759, backward_time=1.025, grad_norm=107.213, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.525e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 04:25:52,274 (trainer:732) INFO: 42epoch:train:9501-9600batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=70.289, loss_att=52.121, acc=0.718, loss=57.571, backward_time=1.025, grad_norm=122.835, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.525e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 04:28:08,506 (trainer:732) INFO: 42epoch:train:9601-9700batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=81.429, loss_att=62.500, acc=0.706, loss=68.178, backward_time=1.028, grad_norm=125.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.524e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 04:30:24,543 (trainer:732) INFO: 42epoch:train:9701-9800batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=71.177, loss_att=53.266, acc=0.725, loss=58.639, backward_time=1.027, grad_norm=126.374, clip=100.000, loss_scale=2.401e+32, optim_step_time=0.181, optim0_lr0=5.523e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 04:32:40,550 (trainer:732) INFO: 42epoch:train:9801-9900batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=75.760, loss_att=57.853, acc=0.719, loss=63.225, backward_time=1.030, grad_norm=129.317, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.523e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 04:34:56,397 (trainer:732) INFO: 42epoch:train:9901-10000batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=73.059, loss_att=52.842, acc=0.731, loss=58.907, backward_time=1.028, grad_norm=109.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.522e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 04:50:54,355 (trainer:338) INFO: 42epoch results: [train] iter_time=0.354, forward_time=0.196, loss_ctc=71.674, loss_att=53.129, acc=0.716, loss=58.693, backward_time=1.042, grad_norm=122.970, clip=100.000, loss_scale=1.303e+32, optim_step_time=0.185, optim0_lr0=5.556e-05, train_time=3.797, time=5 hours, 16 minutes and 39.53 seconds, total_count=390000, gpu_max_cached_mem_GB=37.572, [valid] loss_ctc=44.090, cer_ctc=0.260, loss_att=36.484, acc=0.697, cer=0.342, wer=0.989, loss=38.766, time=6 minutes and 47.34 seconds, total_count=39974, gpu_max_cached_mem_GB=37.572, [att_plot] time=8 minutes and 56.53 seconds, total_count=0, gpu_max_cached_mem_GB=37.572 +[gpub002:0/64] 2023-07-13 04:51:13,901 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub002:0/64] 2023-07-13 04:51:13,945 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/31epoch.pth +[gpub002:0/64] 2023-07-13 04:51:14,020 (trainer:272) INFO: 43/50epoch started. Estimated time to finish: 1 day, 17 hours and 23 minutes +[gpub002:0/64] 2023-07-13 04:51:15,288 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-13 04:51:35,527 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 04:51:39,096 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 04:51:39,097 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 04:51:39,159 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 04:58:53,737 (trainer:732) INFO: 43epoch:train:1-100batch: iter_time=3.165, forward_time=0.177, loss_ctc=67.171, loss_att=52.136, acc=0.696, loss=56.646, backward_time=1.043, grad_norm=125.404, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.521e-05, train_time=9.181 +[gpub002:0/64] 2023-07-13 05:01:09,947 (trainer:732) INFO: 43epoch:train:101-200batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=74.299, loss_att=53.346, acc=0.702, loss=59.632, backward_time=1.031, grad_norm=143.592, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.521e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 05:03:26,486 (trainer:732) INFO: 43epoch:train:201-300batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=72.274, loss_att=51.180, acc=0.712, loss=57.508, backward_time=1.030, grad_norm=127.012, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.520e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 05:05:44,653 (trainer:732) INFO: 43epoch:train:301-400batch: iter_time=1.104e-04, forward_time=0.149, loss_ctc=74.766, loss_att=54.066, acc=0.696, loss=60.276, backward_time=1.031, grad_norm=130.116, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.519e-05, train_time=2.763 +[gpub002:0/64] 2023-07-13 05:08:01,530 (trainer:732) INFO: 43epoch:train:401-500batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.776, loss_att=53.497, acc=0.706, loss=58.381, backward_time=1.029, grad_norm=113.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.519e-05, train_time=2.737 +[gpub002:0/64] 2023-07-13 05:10:24,010 (trainer:732) INFO: 43epoch:train:501-600batch: iter_time=3.155e-04, forward_time=0.146, loss_ctc=78.761, loss_att=55.515, acc=0.699, loss=62.489, backward_time=1.032, grad_norm=125.079, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.518e-05, train_time=2.849 +[gpub002:0/64] 2023-07-13 05:12:54,234 (trainer:732) INFO: 43epoch:train:601-700batch: iter_time=1.062e-04, forward_time=0.144, loss_ctc=71.370, loss_att=43.750, acc=0.721, loss=52.036, backward_time=1.053, grad_norm=126.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.517e-05, train_time=3.004 +[gpub002:0/64] 2023-07-13 05:15:26,303 (trainer:732) INFO: 43epoch:train:701-800batch: iter_time=5.144e-04, forward_time=0.179, loss_ctc=66.772, loss_att=49.511, acc=0.712, loss=54.690, backward_time=1.041, grad_norm=121.595, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.517e-05, train_time=3.041 +[gpub002:0/64] 2023-07-13 05:16:19,570 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 05:16:37,303 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 05:16:40,677 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 05:16:40,677 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 05:16:40,684 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 05:23:58,859 (trainer:732) INFO: 43epoch:train:801-900batch: iter_time=1.808, forward_time=0.186, loss_ctc=68.942, loss_att=51.380, acc=0.705, loss=56.649, backward_time=1.041, grad_norm=122.084, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.516e-05, train_time=10.251 +[gpub002:0/64] 2023-07-13 05:26:15,903 (trainer:732) INFO: 43epoch:train:901-1000batch: iter_time=1.277e-04, forward_time=0.148, loss_ctc=67.154, loss_att=51.145, acc=0.718, loss=55.947, backward_time=1.034, grad_norm=100.494, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.515e-05, train_time=2.741 +[gpub002:0/64] 2023-07-13 05:28:31,874 (trainer:732) INFO: 43epoch:train:1001-1100batch: iter_time=1.304e-04, forward_time=0.145, loss_ctc=74.706, loss_att=53.692, acc=0.714, loss=59.997, backward_time=1.028, grad_norm=134.309, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.515e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 05:30:50,785 (trainer:732) INFO: 43epoch:train:1101-1200batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=70.458, loss_att=49.738, acc=0.704, loss=55.954, backward_time=1.031, grad_norm=126.190, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.514e-05, train_time=2.778 +[gpub002:0/64] 2023-07-13 05:33:07,145 (trainer:732) INFO: 43epoch:train:1201-1300batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=76.640, loss_att=58.441, acc=0.723, loss=63.900, backward_time=1.030, grad_norm=120.803, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.513e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 05:35:42,951 (trainer:732) INFO: 43epoch:train:1301-1400batch: iter_time=0.003, forward_time=0.281, loss_ctc=63.792, loss_att=48.848, acc=0.704, loss=53.331, backward_time=1.064, grad_norm=120.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.193, optim0_lr0=5.513e-05, train_time=3.115 +[gpub002:0/64] 2023-07-13 05:37:59,663 (trainer:732) INFO: 43epoch:train:1401-1500batch: iter_time=1.387e-04, forward_time=0.147, loss_ctc=73.107, loss_att=46.764, acc=0.723, loss=54.667, backward_time=1.030, grad_norm=135.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.512e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 05:40:17,904 (trainer:732) INFO: 43epoch:train:1501-1600batch: iter_time=1.201e-04, forward_time=0.147, loss_ctc=66.824, loss_att=47.833, acc=0.731, loss=53.530, backward_time=1.030, grad_norm=106.942, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.511e-05, train_time=2.765 +[gpub002:0/64] 2023-07-13 05:41:55,175 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 05:42:13,121 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 05:42:16,546 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 05:42:16,546 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 05:42:16,552 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 05:46:42,778 (trainer:732) INFO: 43epoch:train:1601-1700batch: iter_time=2.035, forward_time=0.145, loss_ctc=64.829, loss_att=45.468, acc=0.725, loss=51.276, backward_time=1.045, grad_norm=109.907, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.511e-05, train_time=7.697 +[gpub002:0/64] 2023-07-13 05:49:04,557 (trainer:732) INFO: 43epoch:train:1701-1800batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=68.037, loss_att=51.317, acc=0.717, loss=56.333, backward_time=1.039, grad_norm=110.998, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.510e-05, train_time=2.835 +[gpub002:0/64] 2023-07-13 05:51:20,869 (trainer:732) INFO: 43epoch:train:1801-1900batch: iter_time=1.132e-04, forward_time=0.147, loss_ctc=78.940, loss_att=57.649, acc=0.709, loss=64.036, backward_time=1.028, grad_norm=127.683, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.509e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 05:53:36,717 (trainer:732) INFO: 43epoch:train:1901-2000batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=62.792, loss_att=44.461, acc=0.716, loss=49.960, backward_time=1.025, grad_norm=98.542, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.509e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 05:55:52,706 (trainer:732) INFO: 43epoch:train:2001-2100batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=81.040, loss_att=61.512, acc=0.713, loss=67.371, backward_time=1.029, grad_norm=130.485, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.508e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 05:58:11,248 (trainer:732) INFO: 43epoch:train:2101-2200batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=64.820, loss_att=50.416, acc=0.711, loss=54.738, backward_time=1.030, grad_norm=111.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.507e-05, train_time=2.771 +[gpub002:0/64] 2023-07-13 06:00:26,904 (trainer:732) INFO: 43epoch:train:2201-2300batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=69.248, loss_att=47.166, acc=0.714, loss=53.791, backward_time=1.026, grad_norm=117.367, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.507e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 06:02:46,061 (trainer:732) INFO: 43epoch:train:2301-2400batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=65.678, loss_att=43.020, acc=0.730, loss=49.818, backward_time=1.039, grad_norm=117.099, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.506e-05, train_time=2.783 +[gpub002:0/64] 2023-07-13 06:05:02,779 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 06:05:21,017 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 06:05:24,530 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 06:05:24,530 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 06:05:24,536 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 06:11:05,880 (trainer:732) INFO: 43epoch:train:2401-2500batch: iter_time=1.277, forward_time=0.147, loss_ctc=70.028, loss_att=50.589, acc=0.734, loss=56.420, backward_time=1.041, grad_norm=117.428, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.505e-05, train_time=9.996 +[gpub002:0/64] 2023-07-13 06:13:45,256 (trainer:732) INFO: 43epoch:train:2501-2600batch: iter_time=1.182e-04, forward_time=0.146, loss_ctc=65.700, loss_att=49.932, acc=0.707, loss=54.662, backward_time=1.045, grad_norm=108.564, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.505e-05, train_time=3.187 +[gpub002:0/64] 2023-07-13 06:16:01,175 (trainer:732) INFO: 43epoch:train:2601-2700batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=69.006, loss_att=51.406, acc=0.708, loss=56.686, backward_time=1.029, grad_norm=117.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.504e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 06:18:16,928 (trainer:732) INFO: 43epoch:train:2701-2800batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=68.756, loss_att=49.034, acc=0.720, loss=54.951, backward_time=1.026, grad_norm=113.801, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.503e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 06:20:32,341 (trainer:732) INFO: 43epoch:train:2801-2900batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=74.163, loss_att=54.309, acc=0.702, loss=60.265, backward_time=1.024, grad_norm=120.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.503e-05, train_time=2.708 +[gpub002:0/64] 2023-07-13 06:22:47,910 (trainer:732) INFO: 43epoch:train:2901-3000batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=68.902, loss_att=52.706, acc=0.711, loss=57.565, backward_time=1.027, grad_norm=117.962, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.502e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 06:25:04,075 (trainer:732) INFO: 43epoch:train:3001-3100batch: iter_time=1.264e-04, forward_time=0.148, loss_ctc=72.330, loss_att=53.634, acc=0.706, loss=59.243, backward_time=1.029, grad_norm=126.462, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.501e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 06:27:19,562 (trainer:732) INFO: 43epoch:train:3101-3200batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=64.870, loss_att=42.763, acc=0.727, loss=49.396, backward_time=1.026, grad_norm=109.345, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.501e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 06:29:35,085 (trainer:732) INFO: 43epoch:train:3201-3300batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=66.320, loss_att=48.097, acc=0.724, loss=53.564, backward_time=1.026, grad_norm=116.268, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.500e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 06:30:19,781 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 06:30:37,996 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 06:30:41,419 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 06:30:41,419 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-13 06:30:41,425 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 06:36:35,849 (trainer:732) INFO: 43epoch:train:3301-3400batch: iter_time=1.288, forward_time=0.145, loss_ctc=70.209, loss_att=56.960, acc=0.702, loss=60.934, backward_time=1.040, grad_norm=127.952, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.499e-05, train_time=8.415 +[gpub002:0/64] 2023-07-13 06:38:52,517 (trainer:732) INFO: 43epoch:train:3401-3500batch: iter_time=1.077e-04, forward_time=0.147, loss_ctc=70.143, loss_att=50.603, acc=0.720, loss=56.465, backward_time=1.032, grad_norm=141.257, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.499e-05, train_time=2.733 +[gpub002:0/64] 2023-07-13 06:41:08,598 (trainer:732) INFO: 43epoch:train:3501-3600batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=65.845, loss_att=47.427, acc=0.720, loss=52.953, backward_time=1.031, grad_norm=125.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.498e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 06:43:24,520 (trainer:732) INFO: 43epoch:train:3601-3700batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=77.247, loss_att=55.054, acc=0.710, loss=61.712, backward_time=1.028, grad_norm=118.561, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.497e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 06:45:40,581 (trainer:732) INFO: 43epoch:train:3701-3800batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=65.852, loss_att=50.169, acc=0.722, loss=54.874, backward_time=1.031, grad_norm=106.865, clip=100.000, loss_scale=4.803e+32, optim_step_time=0.182, optim0_lr0=5.497e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 06:47:59,361 (trainer:732) INFO: 43epoch:train:3801-3900batch: iter_time=1.074e-04, forward_time=0.148, loss_ctc=72.644, loss_att=54.032, acc=0.710, loss=59.615, backward_time=1.031, grad_norm=136.386, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.496e-05, train_time=2.775 +[gpub002:0/64] 2023-07-13 06:50:15,237 (trainer:732) INFO: 43epoch:train:3901-4000batch: iter_time=1.063e-04, forward_time=0.147, loss_ctc=64.068, loss_att=40.961, acc=0.734, loss=47.893, backward_time=1.029, grad_norm=130.607, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.495e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 06:52:31,943 (trainer:732) INFO: 43epoch:train:4001-4100batch: iter_time=1.034e-04, forward_time=0.147, loss_ctc=65.865, loss_att=48.470, acc=0.723, loss=53.688, backward_time=1.030, grad_norm=141.896, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.495e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 06:54:15,645 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 06:54:33,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 06:54:37,089 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 06:54:37,089 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 06:54:37,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 06:58:54,596 (trainer:732) INFO: 43epoch:train:4101-4200batch: iter_time=1.344, forward_time=0.226, loss_ctc=68.341, loss_att=49.394, acc=0.720, loss=55.078, backward_time=1.069, grad_norm=135.878, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=5.494e-05, train_time=7.653 +[gpub002:0/64] 2023-07-13 07:01:12,004 (trainer:732) INFO: 43epoch:train:4201-4300batch: iter_time=1.012e-04, forward_time=0.148, loss_ctc=68.518, loss_att=52.143, acc=0.714, loss=57.055, backward_time=1.031, grad_norm=164.714, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.493e-05, train_time=2.748 +[gpub002:0/64] 2023-07-13 07:03:28,068 (trainer:732) INFO: 43epoch:train:4301-4400batch: iter_time=1.019e-04, forward_time=0.146, loss_ctc=78.283, loss_att=56.097, acc=0.717, loss=62.753, backward_time=1.031, grad_norm=131.835, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.493e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 07:05:44,046 (trainer:732) INFO: 43epoch:train:4401-4500batch: iter_time=1.003e-04, forward_time=0.146, loss_ctc=61.875, loss_att=43.498, acc=0.722, loss=49.011, backward_time=1.030, grad_norm=101.796, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.492e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 07:08:00,105 (trainer:732) INFO: 43epoch:train:4501-4600batch: iter_time=9.596e-05, forward_time=0.145, loss_ctc=80.642, loss_att=61.038, acc=0.717, loss=66.919, backward_time=1.029, grad_norm=116.023, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.491e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 07:10:39,483 (trainer:732) INFO: 43epoch:train:4601-4700batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=62.748, loss_att=49.774, acc=0.715, loss=53.666, backward_time=1.071, grad_norm=110.121, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.491e-05, train_time=3.187 +[gpub002:0/64] 2023-07-13 07:12:58,455 (trainer:732) INFO: 43epoch:train:4701-4800batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=69.387, loss_att=46.901, acc=0.713, loss=53.646, backward_time=1.043, grad_norm=122.959, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.490e-05, train_time=2.779 +[gpub002:0/64] 2023-07-13 07:15:14,699 (trainer:732) INFO: 43epoch:train:4801-4900batch: iter_time=1.047e-04, forward_time=0.146, loss_ctc=63.195, loss_att=42.368, acc=0.732, loss=48.616, backward_time=1.029, grad_norm=110.403, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.489e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 07:16:46,779 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 07:17:31,636 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 07:17:49,916 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 07:17:53,361 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 07:17:53,361 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-13 07:17:53,368 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 07:21:12,580 (trainer:732) INFO: 43epoch:train:4901-5000batch: iter_time=1.363, forward_time=0.145, loss_ctc=69.506, loss_att=50.192, acc=0.737, loss=55.987, backward_time=1.033, grad_norm=111.488, clip=100.000, loss_scale=5.431e+32, optim_step_time=0.182, optim0_lr0=5.489e-05, train_time=7.157 +[gpub002:0/64] 2023-07-13 07:23:30,148 (trainer:732) INFO: 43epoch:train:5001-5100batch: iter_time=1.400e-04, forward_time=0.148, loss_ctc=65.881, loss_att=50.693, acc=0.708, loss=55.250, backward_time=1.037, grad_norm=122.810, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.488e-05, train_time=2.751 +[gpub002:0/64] 2023-07-13 07:25:46,716 (trainer:732) INFO: 43epoch:train:5101-5200batch: iter_time=1.426e-04, forward_time=0.147, loss_ctc=67.325, loss_att=50.408, acc=0.713, loss=55.483, backward_time=1.029, grad_norm=122.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.487e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 07:28:02,507 (trainer:732) INFO: 43epoch:train:5201-5300batch: iter_time=1.599e-04, forward_time=0.146, loss_ctc=69.909, loss_att=49.226, acc=0.722, loss=55.431, backward_time=1.029, grad_norm=113.819, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.487e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 07:28:23,967 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 07:30:18,027 (trainer:732) INFO: 43epoch:train:5301-5400batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=73.830, loss_att=53.727, acc=0.707, loss=59.758, backward_time=1.028, grad_norm=126.012, clip=100.000, loss_scale=1.854e+32, optim_step_time=0.182, optim0_lr0=5.486e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 07:32:33,556 (trainer:732) INFO: 43epoch:train:5401-5500batch: iter_time=1.358e-04, forward_time=0.145, loss_ctc=69.457, loss_att=53.006, acc=0.711, loss=57.941, backward_time=1.027, grad_norm=118.602, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.485e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 07:34:49,617 (trainer:732) INFO: 43epoch:train:5501-5600batch: iter_time=1.294e-04, forward_time=0.147, loss_ctc=70.674, loss_att=52.567, acc=0.708, loss=57.999, backward_time=1.029, grad_norm=124.421, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.485e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 07:37:05,125 (trainer:732) INFO: 43epoch:train:5601-5700batch: iter_time=1.538e-04, forward_time=0.146, loss_ctc=64.632, loss_att=42.627, acc=0.728, loss=49.228, backward_time=1.026, grad_norm=117.128, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.484e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 07:39:20,847 (trainer:732) INFO: 43epoch:train:5701-5800batch: iter_time=1.408e-04, forward_time=0.147, loss_ctc=65.046, loss_att=47.284, acc=0.726, loss=52.613, backward_time=1.028, grad_norm=118.675, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.483e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 07:40:20,191 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 07:40:38,345 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 07:40:41,837 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 07:40:41,837 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 07:40:41,843 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 07:46:21,327 (trainer:732) INFO: 43epoch:train:5801-5900batch: iter_time=2.635, forward_time=0.147, loss_ctc=69.559, loss_att=55.942, acc=0.705, loss=60.027, backward_time=1.049, grad_norm=111.034, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.483e-05, train_time=8.409 +[gpub002:0/64] 2023-07-13 07:48:37,870 (trainer:732) INFO: 43epoch:train:5901-6000batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=71.663, loss_att=52.278, acc=0.708, loss=58.094, backward_time=1.029, grad_norm=167.485, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.482e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 07:50:53,784 (trainer:732) INFO: 43epoch:train:6001-6100batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=65.614, loss_att=46.575, acc=0.717, loss=52.287, backward_time=1.029, grad_norm=119.203, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.481e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 07:53:27,460 (trainer:732) INFO: 43epoch:train:6101-6200batch: iter_time=1.129e-04, forward_time=0.144, loss_ctc=75.060, loss_att=54.210, acc=0.707, loss=60.465, backward_time=1.046, grad_norm=113.546, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.481e-05, train_time=3.073 +[gpub002:0/64] 2023-07-13 07:55:51,083 (trainer:732) INFO: 43epoch:train:6201-6300batch: iter_time=3.027e-04, forward_time=0.179, loss_ctc=66.883, loss_att=50.860, acc=0.711, loss=55.667, backward_time=1.034, grad_norm=113.637, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.480e-05, train_time=2.872 +[gpub002:0/64] 2023-07-13 07:58:09,272 (trainer:732) INFO: 43epoch:train:6301-6400batch: iter_time=1.044e-04, forward_time=0.163, loss_ctc=73.490, loss_att=54.979, acc=0.704, loss=60.532, backward_time=1.030, grad_norm=134.206, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.479e-05, train_time=2.764 +[gpub002:0/64] 2023-07-13 08:00:24,736 (trainer:732) INFO: 43epoch:train:6401-6500batch: iter_time=1.026e-04, forward_time=0.144, loss_ctc=65.033, loss_att=41.165, acc=0.730, loss=48.326, backward_time=1.025, grad_norm=125.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.479e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 08:02:40,579 (trainer:732) INFO: 43epoch:train:6501-6600batch: iter_time=1.147e-04, forward_time=0.144, loss_ctc=67.103, loss_att=47.658, acc=0.719, loss=53.491, backward_time=1.028, grad_norm=121.695, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.478e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 08:04:11,900 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 08:04:30,016 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 08:04:33,389 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 08:04:33,389 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-13 08:04:33,395 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 08:09:38,546 (trainer:732) INFO: 43epoch:train:6601-6700batch: iter_time=1.336, forward_time=0.186, loss_ctc=69.082, loss_att=51.109, acc=0.714, loss=56.501, backward_time=1.038, grad_norm=108.294, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.477e-05, train_time=8.359 +[gpub002:0/64] 2023-07-13 08:10:34,386 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 08:12:01,808 (trainer:732) INFO: 43epoch:train:6701-6800batch: iter_time=1.327e-04, forward_time=0.145, loss_ctc=67.704, loss_att=51.773, acc=0.711, loss=56.552, backward_time=1.037, grad_norm=130.635, clip=100.000, loss_scale=1.109e+32, optim_step_time=0.182, optim0_lr0=5.477e-05, train_time=2.865 +[gpub002:0/64] 2023-07-13 08:14:23,253 (trainer:732) INFO: 43epoch:train:6801-6900batch: iter_time=1.293e-04, forward_time=0.145, loss_ctc=77.182, loss_att=56.548, acc=0.705, loss=62.738, backward_time=1.042, grad_norm=118.840, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.476e-05, train_time=2.829 +[gpub002:0/64] 2023-07-13 08:16:48,767 (trainer:732) INFO: 43epoch:train:6901-7000batch: iter_time=1.451e-04, forward_time=0.145, loss_ctc=60.933, loss_att=43.482, acc=0.718, loss=48.717, backward_time=1.039, grad_norm=142.200, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.476e-05, train_time=2.910 +[gpub002:0/64] 2023-07-13 08:19:18,077 (trainer:732) INFO: 43epoch:train:7001-7100batch: iter_time=1.466e-04, forward_time=0.146, loss_ctc=80.225, loss_att=60.938, acc=0.714, loss=66.724, backward_time=1.043, grad_norm=128.693, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.475e-05, train_time=2.986 +[gpub002:0/64] 2023-07-13 08:21:41,636 (trainer:732) INFO: 43epoch:train:7101-7200batch: iter_time=1.517e-04, forward_time=0.146, loss_ctc=62.221, loss_att=49.382, acc=0.708, loss=53.234, backward_time=1.042, grad_norm=137.390, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.474e-05, train_time=2.871 +[gpub002:0/64] 2023-07-13 08:24:09,625 (trainer:732) INFO: 43epoch:train:7201-7300batch: iter_time=1.485e-04, forward_time=0.145, loss_ctc=69.377, loss_att=47.292, acc=0.709, loss=53.917, backward_time=1.058, grad_norm=123.920, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.474e-05, train_time=2.960 +[gpub002:0/64] 2023-07-13 08:26:25,316 (trainer:732) INFO: 43epoch:train:7301-7400batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=62.734, loss_att=42.130, acc=0.731, loss=48.311, backward_time=1.028, grad_norm=112.463, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.473e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 08:28:42,136 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 08:29:00,185 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 08:29:03,836 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 08:29:03,836 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 08:29:03,842 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 08:31:55,060 (trainer:732) INFO: 43epoch:train:7401-7500batch: iter_time=1.410, forward_time=0.173, loss_ctc=70.908, loss_att=52.086, acc=0.730, loss=57.732, backward_time=1.031, grad_norm=103.567, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.472e-05, train_time=6.595 +[gpub002:0/64] 2023-07-13 08:34:14,214 (trainer:732) INFO: 43epoch:train:7501-7600batch: iter_time=1.180e-04, forward_time=0.147, loss_ctc=66.889, loss_att=51.014, acc=0.718, loss=55.777, backward_time=1.039, grad_norm=122.067, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.472e-05, train_time=2.783 +[gpub002:0/64] 2023-07-13 08:36:31,213 (trainer:732) INFO: 43epoch:train:7601-7700batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=76.393, loss_att=55.664, acc=0.714, loss=61.882, backward_time=1.029, grad_norm=138.860, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.471e-05, train_time=2.740 +[gpub002:0/64] 2023-07-13 08:38:47,398 (trainer:732) INFO: 43epoch:train:7701-7800batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=62.356, loss_att=44.020, acc=0.725, loss=49.521, backward_time=1.030, grad_norm=118.319, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.470e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 08:41:05,554 (trainer:732) INFO: 43epoch:train:7801-7900batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=79.362, loss_att=60.799, acc=0.719, loss=66.368, backward_time=1.031, grad_norm=129.499, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.470e-05, train_time=2.763 +[gpub002:0/64] 2023-07-13 08:43:32,372 (trainer:732) INFO: 43epoch:train:7901-8000batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=62.046, loss_att=49.851, acc=0.708, loss=53.509, backward_time=1.046, grad_norm=124.743, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.469e-05, train_time=2.936 +[gpub002:0/64] 2023-07-13 08:45:48,570 (trainer:732) INFO: 43epoch:train:8001-8100batch: iter_time=1.225e-04, forward_time=0.144, loss_ctc=67.225, loss_att=44.208, acc=0.725, loss=51.113, backward_time=1.027, grad_norm=118.012, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.468e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 08:48:07,719 (trainer:732) INFO: 43epoch:train:8101-8200batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=64.666, loss_att=43.302, acc=0.727, loss=49.711, backward_time=1.038, grad_norm=118.883, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.468e-05, train_time=2.783 +[gpub002:0/64] 2023-07-13 08:50:32,893 (trainer:732) INFO: 43epoch:train:8201-8300batch: iter_time=1.252e-04, forward_time=0.145, loss_ctc=70.428, loss_att=52.218, acc=0.734, loss=57.681, backward_time=1.049, grad_norm=109.704, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.467e-05, train_time=2.903 +[gpub002:0/64] 2023-07-13 08:51:36,412 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-13 08:51:54,619 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 08:51:58,008 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 08:51:58,008 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 08:51:58,097 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 08:58:16,119 (trainer:732) INFO: 43epoch:train:8301-8400batch: iter_time=3.044, forward_time=0.205, loss_ctc=68.169, loss_att=54.539, acc=0.704, loss=58.628, backward_time=1.054, grad_norm=109.947, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=5.466e-05, train_time=9.264 +[gpub002:0/64] 2023-07-13 09:00:33,779 (trainer:732) INFO: 43epoch:train:8401-8500batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=71.006, loss_att=51.275, acc=0.715, loss=57.194, backward_time=1.029, grad_norm=111.833, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.466e-05, train_time=2.753 +[gpub002:0/64] 2023-07-13 09:02:50,313 (trainer:732) INFO: 43epoch:train:8501-8600batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=65.216, loss_att=46.304, acc=0.720, loss=51.978, backward_time=1.031, grad_norm=114.151, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.465e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 09:05:11,317 (trainer:732) INFO: 43epoch:train:8601-8700batch: iter_time=1.264e-04, forward_time=0.144, loss_ctc=75.105, loss_att=54.233, acc=0.710, loss=60.494, backward_time=1.034, grad_norm=141.900, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.464e-05, train_time=2.820 +[gpub002:0/64] 2023-07-13 09:07:26,944 (trainer:732) INFO: 43epoch:train:8701-8800batch: iter_time=1.226e-04, forward_time=0.144, loss_ctc=65.886, loss_att=50.674, acc=0.710, loss=55.238, backward_time=1.027, grad_norm=114.949, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.464e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 09:09:54,294 (trainer:732) INFO: 43epoch:train:8801-8900batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=72.328, loss_att=54.323, acc=0.708, loss=59.725, backward_time=1.040, grad_norm=129.091, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.463e-05, train_time=2.947 +[gpub002:0/64] 2023-07-13 09:12:09,955 (trainer:732) INFO: 43epoch:train:8901-9000batch: iter_time=1.209e-04, forward_time=0.144, loss_ctc=62.790, loss_att=40.413, acc=0.733, loss=47.126, backward_time=1.026, grad_norm=123.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.462e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 09:14:30,267 (trainer:732) INFO: 43epoch:train:9001-9100batch: iter_time=1.184e-04, forward_time=0.144, loss_ctc=65.546, loss_att=47.434, acc=0.723, loss=52.868, backward_time=1.030, grad_norm=126.202, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.462e-05, train_time=2.806 +[gpub002:0/64] 2023-07-13 09:16:03,133 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-13 09:16:21,262 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 09:16:24,730 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 09:16:24,730 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 09:16:24,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 09:19:55,353 (trainer:732) INFO: 43epoch:train:9101-9200batch: iter_time=1.299, forward_time=0.166, loss_ctc=68.022, loss_att=50.137, acc=0.719, loss=55.502, backward_time=1.039, grad_norm=114.933, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.461e-05, train_time=6.502 +[gpub002:0/64] 2023-07-13 09:22:18,425 (trainer:732) INFO: 43epoch:train:9201-9300batch: iter_time=0.002, forward_time=0.182, loss_ctc=67.757, loss_att=51.479, acc=0.719, loss=56.362, backward_time=1.044, grad_norm=114.798, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.184, optim0_lr0=5.460e-05, train_time=2.861 +[gpub002:0/64] 2023-07-13 09:24:35,968 (trainer:732) INFO: 43epoch:train:9301-9400batch: iter_time=1.123e-04, forward_time=0.148, loss_ctc=75.353, loss_att=56.327, acc=0.716, loss=62.035, backward_time=1.030, grad_norm=143.157, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.460e-05, train_time=2.751 +[gpub002:0/64] 2023-07-13 09:26:52,066 (trainer:732) INFO: 43epoch:train:9401-9500batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=63.167, loss_att=43.706, acc=0.722, loss=49.544, backward_time=1.028, grad_norm=124.127, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.459e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 09:29:08,787 (trainer:732) INFO: 43epoch:train:9501-9600batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=78.082, loss_att=59.968, acc=0.719, loss=65.402, backward_time=1.029, grad_norm=139.813, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.459e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 09:31:24,647 (trainer:732) INFO: 43epoch:train:9601-9700batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=62.075, loss_att=49.317, acc=0.717, loss=53.145, backward_time=1.028, grad_norm=129.904, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.458e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 09:33:40,464 (trainer:732) INFO: 43epoch:train:9701-9800batch: iter_time=1.205e-04, forward_time=0.145, loss_ctc=67.821, loss_att=46.388, acc=0.718, loss=52.818, backward_time=1.029, grad_norm=116.293, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.457e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 09:35:56,258 (trainer:732) INFO: 43epoch:train:9801-9900batch: iter_time=1.285e-04, forward_time=0.146, loss_ctc=62.427, loss_att=40.698, acc=0.739, loss=47.217, backward_time=1.028, grad_norm=116.885, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.457e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 09:38:11,996 (trainer:732) INFO: 43epoch:train:9901-10000batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=69.128, loss_att=50.402, acc=0.737, loss=56.020, backward_time=1.028, grad_norm=113.548, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.456e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 09:51:01,601 (trainer:338) INFO: 43epoch results: [train] iter_time=0.220, forward_time=0.151, loss_ctc=69.174, loss_att=50.360, acc=0.716, loss=56.005, backward_time=1.034, grad_norm=122.264, clip=100.000, loss_scale=2.614e+32, optim_step_time=0.182, optim0_lr0=5.488e-05, train_time=3.443, time=4 hours, 47 minutes and 9.28 seconds, total_count=400000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=44.035, cer_ctc=0.257, loss_att=36.329, acc=0.704, cer=0.323, wer=0.986, loss=38.641, time=6 minutes and 40.54 seconds, total_count=40986, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 57.64 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-13 09:51:20,539 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub002:0/64] 2023-07-13 09:51:20,601 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/23epoch.pth +[gpub002:0/64] 2023-07-13 09:51:20,630 (trainer:272) INFO: 44/50epoch started. Estimated time to finish: 1 day, 11 hours and 55 minutes +[gpub002:0/64] 2023-07-13 09:51:21,201 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-13 09:51:40,196 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 09:51:43,685 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 09:51:43,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 09:51:43,691 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 09:59:49,160 (trainer:732) INFO: 44epoch:train:1-100batch: iter_time=3.652, forward_time=0.172, loss_ctc=71.282, loss_att=54.434, acc=0.717, loss=59.488, backward_time=1.044, grad_norm=125.239, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.455e-05, train_time=10.160 +[gpub002:0/64] 2023-07-13 10:02:06,233 (trainer:732) INFO: 44epoch:train:101-200batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=74.775, loss_att=56.050, acc=0.700, loss=61.668, backward_time=1.031, grad_norm=122.349, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.455e-05, train_time=2.742 +[gpub002:0/64] 2023-07-13 10:04:22,775 (trainer:732) INFO: 44epoch:train:201-300batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=87.979, loss_att=66.421, acc=0.708, loss=72.889, backward_time=1.032, grad_norm=117.444, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.454e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 10:06:38,707 (trainer:732) INFO: 44epoch:train:301-400batch: iter_time=1.104e-04, forward_time=0.144, loss_ctc=70.492, loss_att=49.371, acc=0.696, loss=55.707, backward_time=1.027, grad_norm=120.741, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.453e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 10:08:58,696 (trainer:732) INFO: 44epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=76.164, loss_att=60.005, acc=0.695, loss=64.852, backward_time=1.030, grad_norm=120.304, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.453e-05, train_time=2.800 +[gpub002:0/64] 2023-07-13 10:11:18,222 (trainer:732) INFO: 44epoch:train:501-600batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=86.092, loss_att=56.917, acc=0.713, loss=65.670, backward_time=1.031, grad_norm=137.885, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.452e-05, train_time=2.790 +[gpub002:0/64] 2023-07-13 10:13:52,188 (trainer:732) INFO: 44epoch:train:601-700batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=72.733, loss_att=54.095, acc=0.720, loss=59.686, backward_time=1.042, grad_norm=123.779, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.451e-05, train_time=3.079 +[gpub002:0/64] 2023-07-13 10:16:22,649 (trainer:732) INFO: 44epoch:train:701-800batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=76.359, loss_att=63.914, acc=0.695, loss=67.647, backward_time=1.055, grad_norm=120.020, clip=100.000, loss_scale=1.314e+32, optim_step_time=0.182, optim0_lr0=5.451e-05, train_time=3.009 +[gpub002:0/64] 2023-07-13 10:17:24,426 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 10:17:42,352 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 10:17:45,723 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 10:17:45,723 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 10:17:45,730 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 10:24:20,092 (trainer:732) INFO: 44epoch:train:801-900batch: iter_time=3.299, forward_time=0.177, loss_ctc=75.207, loss_att=54.676, acc=0.718, loss=60.835, backward_time=1.047, grad_norm=125.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.450e-05, train_time=9.548 +[gpub002:0/64] 2023-07-13 10:26:36,519 (trainer:732) INFO: 44epoch:train:901-1000batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=74.822, loss_att=57.463, acc=0.693, loss=62.670, backward_time=1.028, grad_norm=130.012, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.449e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 10:28:53,173 (trainer:732) INFO: 44epoch:train:1001-1100batch: iter_time=1.127e-04, forward_time=0.147, loss_ctc=83.374, loss_att=63.558, acc=0.698, loss=69.502, backward_time=1.030, grad_norm=144.799, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.449e-05, train_time=2.733 +[gpub002:0/64] 2023-07-13 10:31:09,555 (trainer:732) INFO: 44epoch:train:1101-1200batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=77.286, loss_att=52.646, acc=0.715, loss=60.038, backward_time=1.031, grad_norm=129.265, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.448e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 10:33:25,148 (trainer:732) INFO: 44epoch:train:1201-1300batch: iter_time=1.248e-04, forward_time=0.143, loss_ctc=71.158, loss_att=53.092, acc=0.697, loss=58.512, backward_time=1.026, grad_norm=97.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.447e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 10:35:40,885 (trainer:732) INFO: 44epoch:train:1301-1400batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=79.055, loss_att=55.984, acc=0.699, loss=62.905, backward_time=1.028, grad_norm=113.589, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.447e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 10:37:56,583 (trainer:732) INFO: 44epoch:train:1401-1500batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=76.489, loss_att=55.112, acc=0.717, loss=61.525, backward_time=1.027, grad_norm=124.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.446e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 10:40:12,676 (trainer:732) INFO: 44epoch:train:1501-1600batch: iter_time=1.402e-04, forward_time=0.145, loss_ctc=71.799, loss_att=59.313, acc=0.689, loss=63.059, backward_time=1.031, grad_norm=120.829, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.446e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 10:41:44,379 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 10:42:02,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 10:42:06,277 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 10:42:06,277 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 10:42:06,283 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 10:45:52,981 (trainer:732) INFO: 44epoch:train:1601-1700batch: iter_time=1.287, forward_time=0.144, loss_ctc=80.146, loss_att=60.139, acc=0.710, loss=66.141, backward_time=1.038, grad_norm=125.126, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.445e-05, train_time=6.806 +[gpub002:0/64] 2023-07-13 10:48:09,910 (trainer:732) INFO: 44epoch:train:1701-1800batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=70.303, loss_att=54.003, acc=0.711, loss=58.893, backward_time=1.033, grad_norm=114.713, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.444e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 10:50:25,669 (trainer:732) INFO: 44epoch:train:1801-1900batch: iter_time=1.468e-04, forward_time=0.146, loss_ctc=76.075, loss_att=61.114, acc=0.693, loss=65.602, backward_time=1.028, grad_norm=138.441, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.444e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 10:52:41,672 (trainer:732) INFO: 44epoch:train:1901-2000batch: iter_time=1.075e-04, forward_time=0.144, loss_ctc=83.849, loss_att=56.957, acc=0.718, loss=65.025, backward_time=1.029, grad_norm=117.446, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.443e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 10:54:57,290 (trainer:732) INFO: 44epoch:train:2001-2100batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=67.877, loss_att=48.882, acc=0.702, loss=54.581, backward_time=1.026, grad_norm=113.885, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.442e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 10:57:13,426 (trainer:732) INFO: 44epoch:train:2101-2200batch: iter_time=9.840e-05, forward_time=0.144, loss_ctc=81.071, loss_att=59.103, acc=0.698, loss=65.693, backward_time=1.029, grad_norm=131.853, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.442e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 10:59:32,038 (trainer:732) INFO: 44epoch:train:2201-2300batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=74.573, loss_att=53.223, acc=0.709, loss=59.628, backward_time=1.030, grad_norm=119.358, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.441e-05, train_time=2.772 +[gpub002:0/64] 2023-07-13 11:01:50,330 (trainer:732) INFO: 44epoch:train:2301-2400batch: iter_time=1.018e-04, forward_time=0.145, loss_ctc=75.625, loss_att=60.504, acc=0.695, loss=65.040, backward_time=1.041, grad_norm=128.773, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.440e-05, train_time=2.766 +[gpub002:0/64] 2023-07-13 11:04:07,084 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 11:04:25,422 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 11:04:28,900 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 11:04:28,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-13 11:04:28,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 11:08:22,215 (trainer:732) INFO: 44epoch:train:2401-2500batch: iter_time=1.295, forward_time=0.144, loss_ctc=78.132, loss_att=62.868, acc=0.704, loss=67.447, backward_time=1.036, grad_norm=137.248, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.440e-05, train_time=7.837 +[gpub002:0/64] 2023-07-13 11:10:40,796 (trainer:732) INFO: 44epoch:train:2501-2600batch: iter_time=1.172e-04, forward_time=0.144, loss_ctc=67.608, loss_att=49.652, acc=0.723, loss=55.039, backward_time=1.036, grad_norm=147.316, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.439e-05, train_time=2.771 +[gpub002:0/64] 2023-07-13 11:12:56,584 (trainer:732) INFO: 44epoch:train:2601-2700batch: iter_time=1.208e-04, forward_time=0.144, loss_ctc=73.380, loss_att=58.680, acc=0.696, loss=63.090, backward_time=1.029, grad_norm=223.909, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.438e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 11:15:12,427 (trainer:732) INFO: 44epoch:train:2701-2800batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=86.240, loss_att=61.095, acc=0.709, loss=68.639, backward_time=1.027, grad_norm=160.636, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.438e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 11:17:27,987 (trainer:732) INFO: 44epoch:train:2801-2900batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=68.542, loss_att=47.662, acc=0.704, loss=53.926, backward_time=1.026, grad_norm=161.052, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.437e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 11:19:43,703 (trainer:732) INFO: 44epoch:train:2901-3000batch: iter_time=1.479e-04, forward_time=0.145, loss_ctc=74.838, loss_att=58.199, acc=0.693, loss=63.191, backward_time=1.028, grad_norm=127.448, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.437e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 11:21:59,433 (trainer:732) INFO: 44epoch:train:3001-3100batch: iter_time=1.218e-04, forward_time=0.144, loss_ctc=85.103, loss_att=56.031, acc=0.712, loss=64.753, backward_time=1.027, grad_norm=130.375, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.436e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 11:24:15,402 (trainer:732) INFO: 44epoch:train:3101-3200batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=70.848, loss_att=54.267, acc=0.711, loss=59.241, backward_time=1.029, grad_norm=107.471, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.435e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 11:26:31,345 (trainer:732) INFO: 44epoch:train:3201-3300batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=78.308, loss_att=65.274, acc=0.688, loss=69.184, backward_time=1.030, grad_norm=163.070, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.435e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 11:27:16,348 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 11:27:34,316 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 11:27:37,764 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 11:27:37,764 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 11:27:37,771 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 11:34:38,710 (trainer:732) INFO: 44epoch:train:3301-3400batch: iter_time=1.305, forward_time=0.144, loss_ctc=71.977, loss_att=52.228, acc=0.724, loss=58.153, backward_time=1.042, grad_norm=140.230, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.434e-05, train_time=9.747 +[gpub002:0/64] 2023-07-13 11:36:55,659 (trainer:732) INFO: 44epoch:train:3401-3500batch: iter_time=1.283e-04, forward_time=0.145, loss_ctc=73.853, loss_att=55.371, acc=0.710, loss=60.915, backward_time=1.029, grad_norm=146.633, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.433e-05, train_time=2.739 +[gpub002:0/64] 2023-07-13 11:39:16,602 (trainer:732) INFO: 44epoch:train:3501-3600batch: iter_time=1.104e-04, forward_time=0.146, loss_ctc=82.166, loss_att=61.239, acc=0.713, loss=67.517, backward_time=1.055, grad_norm=156.465, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.193, optim0_lr0=5.433e-05, train_time=2.819 +[gpub002:0/64] 2023-07-13 11:41:35,797 (trainer:732) INFO: 44epoch:train:3601-3700batch: iter_time=1.001e-04, forward_time=0.146, loss_ctc=76.586, loss_att=51.918, acc=0.716, loss=59.318, backward_time=1.044, grad_norm=124.601, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.193, optim0_lr0=5.432e-05, train_time=2.784 +[gpub002:0/64] 2023-07-13 11:43:51,423 (trainer:732) INFO: 44epoch:train:3701-3800batch: iter_time=9.900e-05, forward_time=0.145, loss_ctc=69.296, loss_att=53.491, acc=0.701, loss=58.232, backward_time=1.027, grad_norm=124.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.431e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 11:46:07,819 (trainer:732) INFO: 44epoch:train:3801-3900batch: iter_time=1.039e-04, forward_time=0.147, loss_ctc=78.553, loss_att=56.210, acc=0.707, loss=62.913, backward_time=1.031, grad_norm=124.973, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.431e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 11:48:28,053 (trainer:732) INFO: 44epoch:train:3901-4000batch: iter_time=1.387e-04, forward_time=0.148, loss_ctc=74.950, loss_att=55.969, acc=0.726, loss=61.664, backward_time=1.037, grad_norm=137.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.430e-05, train_time=2.804 +[gpub002:0/64] 2023-07-13 11:50:48,088 (trainer:732) INFO: 44epoch:train:4001-4100batch: iter_time=1.290e-04, forward_time=0.148, loss_ctc=70.087, loss_att=56.682, acc=0.705, loss=60.703, backward_time=1.035, grad_norm=149.974, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.429e-05, train_time=2.800 +[gpub002:0/64] 2023-07-13 11:52:19,107 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 11:52:37,276 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 11:52:40,687 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 11:52:40,687 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 11:52:40,693 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 11:59:04,406 (trainer:732) INFO: 44epoch:train:4101-4200batch: iter_time=1.307, forward_time=0.146, loss_ctc=78.980, loss_att=60.456, acc=0.713, loss=66.013, backward_time=1.053, grad_norm=139.801, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.429e-05, train_time=9.926 +[gpub002:0/64] 2023-07-13 12:01:20,686 (trainer:732) INFO: 44epoch:train:4201-4300batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=70.147, loss_att=53.310, acc=0.720, loss=58.362, backward_time=1.030, grad_norm=116.468, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.428e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 12:03:37,446 (trainer:732) INFO: 44epoch:train:4301-4400batch: iter_time=1.102e-04, forward_time=0.145, loss_ctc=76.582, loss_att=59.622, acc=0.708, loss=64.710, backward_time=1.032, grad_norm=126.272, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.428e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 12:05:53,280 (trainer:732) INFO: 44epoch:train:4401-4500batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=82.415, loss_att=56.209, acc=0.724, loss=64.071, backward_time=1.028, grad_norm=129.616, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.427e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 12:08:08,646 (trainer:732) INFO: 44epoch:train:4501-4600batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=65.252, loss_att=46.355, acc=0.705, loss=52.024, backward_time=1.024, grad_norm=116.158, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.426e-05, train_time=2.707 +[gpub002:0/64] 2023-07-13 12:10:24,749 (trainer:732) INFO: 44epoch:train:4601-4700batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=80.559, loss_att=58.522, acc=0.706, loss=65.133, backward_time=1.030, grad_norm=160.477, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.426e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 12:12:40,507 (trainer:732) INFO: 44epoch:train:4701-4800batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=76.103, loss_att=53.892, acc=0.722, loss=60.555, backward_time=1.027, grad_norm=118.492, clip=100.000, loss_scale=2.629e+32, optim_step_time=0.181, optim0_lr0=5.425e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 12:14:56,545 (trainer:732) INFO: 44epoch:train:4801-4900batch: iter_time=1.162e-04, forward_time=0.146, loss_ctc=76.099, loss_att=61.644, acc=0.708, loss=65.980, backward_time=1.030, grad_norm=121.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.424e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 12:17:11,370 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 12:17:29,722 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 12:17:33,178 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 12:17:33,178 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 12:17:33,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 12:23:24,781 (trainer:732) INFO: 44epoch:train:4901-5000batch: iter_time=1.333, forward_time=0.145, loss_ctc=75.905, loss_att=57.847, acc=0.711, loss=63.264, backward_time=1.039, grad_norm=126.648, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.424e-05, train_time=10.165 +[gpub002:0/64] 2023-07-13 12:25:45,045 (trainer:732) INFO: 44epoch:train:5001-5100batch: iter_time=1.084e-04, forward_time=0.146, loss_ctc=70.805, loss_att=55.156, acc=0.702, loss=59.850, backward_time=1.041, grad_norm=116.577, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.423e-05, train_time=2.805 +[gpub002:0/64] 2023-07-13 12:28:00,589 (trainer:732) INFO: 44epoch:train:5101-5200batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=78.881, loss_att=61.680, acc=0.692, loss=66.840, backward_time=1.027, grad_norm=123.318, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.422e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 12:30:16,542 (trainer:732) INFO: 44epoch:train:5201-5300batch: iter_time=1.268e-04, forward_time=0.147, loss_ctc=80.888, loss_att=54.612, acc=0.726, loss=62.495, backward_time=1.029, grad_norm=126.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.422e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 12:32:32,337 (trainer:732) INFO: 44epoch:train:5301-5400batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=65.520, loss_att=48.765, acc=0.704, loss=53.792, backward_time=1.028, grad_norm=115.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.421e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 12:34:59,810 (trainer:732) INFO: 44epoch:train:5401-5500batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=77.603, loss_att=55.499, acc=0.696, loss=62.130, backward_time=1.038, grad_norm=131.812, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.421e-05, train_time=2.949 +[gpub002:0/64] 2023-07-13 12:37:15,536 (trainer:732) INFO: 44epoch:train:5501-5600batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=73.151, loss_att=52.695, acc=0.714, loss=58.832, backward_time=1.027, grad_norm=108.368, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.420e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 12:39:31,588 (trainer:732) INFO: 44epoch:train:5601-5700batch: iter_time=1.156e-04, forward_time=0.145, loss_ctc=77.041, loss_att=62.776, acc=0.692, loss=67.056, backward_time=1.030, grad_norm=117.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.419e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 12:41:47,589 (trainer:732) INFO: 44epoch:train:5701-5800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=75.964, loss_att=57.575, acc=0.711, loss=63.092, backward_time=1.029, grad_norm=131.182, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.419e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 12:42:32,777 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 12:42:51,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 12:42:54,458 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 12:42:54,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 12:42:54,464 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 12:48:25,155 (trainer:732) INFO: 44epoch:train:5801-5900batch: iter_time=1.344, forward_time=0.188, loss_ctc=69.328, loss_att=52.361, acc=0.722, loss=57.451, backward_time=1.039, grad_norm=118.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.418e-05, train_time=7.951 +[gpub002:0/64] 2023-07-13 12:50:41,794 (trainer:732) INFO: 44epoch:train:5901-6000batch: iter_time=1.069e-04, forward_time=0.145, loss_ctc=71.719, loss_att=54.368, acc=0.713, loss=59.573, backward_time=1.028, grad_norm=148.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.417e-05, train_time=2.733 +[gpub002:0/64] 2023-07-13 12:52:58,663 (trainer:732) INFO: 44epoch:train:6001-6100batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=82.110, loss_att=60.893, acc=0.716, loss=67.258, backward_time=1.032, grad_norm=162.632, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.417e-05, train_time=2.737 +[gpub002:0/64] 2023-07-13 12:55:14,739 (trainer:732) INFO: 44epoch:train:6101-6200batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=76.069, loss_att=50.970, acc=0.722, loss=58.499, backward_time=1.030, grad_norm=127.718, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.416e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 12:57:31,152 (trainer:732) INFO: 44epoch:train:6201-6300batch: iter_time=1.203e-04, forward_time=0.147, loss_ctc=68.497, loss_att=51.883, acc=0.704, loss=56.867, backward_time=1.031, grad_norm=116.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.415e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 12:59:47,343 (trainer:732) INFO: 44epoch:train:6301-6400batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=79.729, loss_att=55.182, acc=0.711, loss=62.546, backward_time=1.030, grad_norm=126.405, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.415e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 13:02:03,500 (trainer:732) INFO: 44epoch:train:6401-6500batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=74.743, loss_att=55.259, acc=0.729, loss=61.104, backward_time=1.030, grad_norm=129.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.414e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 13:04:21,478 (trainer:732) INFO: 44epoch:train:6501-6600batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=72.287, loss_att=56.876, acc=0.707, loss=61.499, backward_time=1.035, grad_norm=121.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.414e-05, train_time=2.759 +[gpub002:0/64] 2023-07-13 13:06:07,952 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 13:06:26,685 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 13:06:30,127 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 13:06:30,127 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 13:06:30,133 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 13:10:55,875 (trainer:732) INFO: 44epoch:train:6601-6700batch: iter_time=2.456, forward_time=0.155, loss_ctc=76.904, loss_att=57.906, acc=0.720, loss=63.606, backward_time=1.050, grad_norm=144.697, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.413e-05, train_time=7.888 +[gpub002:0/64] 2023-07-13 13:13:12,276 (trainer:732) INFO: 44epoch:train:6701-6800batch: iter_time=1.179e-04, forward_time=0.144, loss_ctc=71.936, loss_att=56.557, acc=0.703, loss=61.171, backward_time=1.027, grad_norm=116.798, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.412e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 13:15:28,808 (trainer:732) INFO: 44epoch:train:6801-6900batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=80.914, loss_att=61.465, acc=0.700, loss=67.299, backward_time=1.028, grad_norm=148.620, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.412e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 13:17:44,586 (trainer:732) INFO: 44epoch:train:6901-7000batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=77.724, loss_att=54.794, acc=0.706, loss=61.673, backward_time=1.027, grad_norm=112.450, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.411e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 13:20:00,070 (trainer:732) INFO: 44epoch:train:7001-7100batch: iter_time=1.287e-04, forward_time=0.144, loss_ctc=69.685, loss_att=51.830, acc=0.704, loss=57.186, backward_time=1.026, grad_norm=123.047, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.410e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 13:22:20,661 (trainer:732) INFO: 44epoch:train:7101-7200batch: iter_time=1.207e-04, forward_time=0.165, loss_ctc=81.406, loss_att=55.579, acc=0.704, loss=63.327, backward_time=1.031, grad_norm=171.513, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.410e-05, train_time=2.812 +[gpub002:0/64] 2023-07-13 13:24:36,675 (trainer:732) INFO: 44epoch:train:7201-7300batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=73.378, loss_att=57.476, acc=0.708, loss=62.246, backward_time=1.030, grad_norm=116.929, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.409e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 13:26:53,257 (trainer:732) INFO: 44epoch:train:7301-7400batch: iter_time=1.122e-04, forward_time=0.145, loss_ctc=72.420, loss_att=58.355, acc=0.694, loss=62.575, backward_time=1.029, grad_norm=124.267, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.408e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 13:29:10,199 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 13:29:28,349 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 13:29:31,773 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 13:29:31,773 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 13:29:31,780 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 13:32:48,933 (trainer:732) INFO: 44epoch:train:7401-7500batch: iter_time=1.337, forward_time=0.199, loss_ctc=76.054, loss_att=55.067, acc=0.719, loss=61.363, backward_time=1.035, grad_norm=122.867, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.408e-05, train_time=7.113 +[gpub002:0/64] 2023-07-13 13:35:07,231 (trainer:732) INFO: 44epoch:train:7501-7600batch: iter_time=1.428e-04, forward_time=0.147, loss_ctc=72.133, loss_att=54.014, acc=0.717, loss=59.450, backward_time=1.036, grad_norm=138.467, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.407e-05, train_time=2.766 +[gpub002:0/64] 2023-07-13 13:37:23,964 (trainer:732) INFO: 44epoch:train:7601-7700batch: iter_time=9.887e-05, forward_time=0.145, loss_ctc=78.211, loss_att=59.906, acc=0.709, loss=65.397, backward_time=1.030, grad_norm=141.983, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.407e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 13:39:40,522 (trainer:732) INFO: 44epoch:train:7701-7800batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=81.364, loss_att=54.416, acc=0.730, loss=62.501, backward_time=1.028, grad_norm=135.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.406e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 13:41:55,986 (trainer:732) INFO: 44epoch:train:7801-7900batch: iter_time=1.134e-04, forward_time=0.144, loss_ctc=64.512, loss_att=47.472, acc=0.708, loss=52.584, backward_time=1.026, grad_norm=123.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.405e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 13:44:11,989 (trainer:732) INFO: 44epoch:train:7901-8000batch: iter_time=1.154e-04, forward_time=0.145, loss_ctc=77.259, loss_att=55.929, acc=0.709, loss=62.328, backward_time=1.029, grad_norm=131.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.405e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 13:46:37,906 (trainer:732) INFO: 44epoch:train:8001-8100batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=73.351, loss_att=53.304, acc=0.719, loss=59.318, backward_time=1.062, grad_norm=109.641, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.404e-05, train_time=2.918 +[gpub002:0/64] 2023-07-13 13:48:56,865 (trainer:732) INFO: 44epoch:train:8101-8200batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=74.136, loss_att=61.403, acc=0.701, loss=65.223, backward_time=1.035, grad_norm=118.234, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.403e-05, train_time=2.779 +[gpub002:0/64] 2023-07-13 13:51:17,363 (trainer:732) INFO: 44epoch:train:8201-8300batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=76.981, loss_att=57.043, acc=0.721, loss=63.025, backward_time=1.034, grad_norm=133.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.403e-05, train_time=2.810 +[gpub002:0/64] 2023-07-13 13:52:22,069 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-13 13:52:40,279 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 13:52:43,910 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 13:52:43,910 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-13 13:52:43,916 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 13:59:20,241 (trainer:732) INFO: 44epoch:train:8301-8400batch: iter_time=2.263, forward_time=0.185, loss_ctc=68.303, loss_att=51.973, acc=0.725, loss=56.872, backward_time=1.048, grad_norm=114.275, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.402e-05, train_time=9.657 +[gpub002:0/64] 2023-07-13 14:01:37,614 (trainer:732) INFO: 44epoch:train:8401-8500batch: iter_time=9.752e-05, forward_time=0.144, loss_ctc=71.653, loss_att=56.730, acc=0.711, loss=61.207, backward_time=1.030, grad_norm=117.549, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.402e-05, train_time=2.748 +[gpub002:0/64] 2023-07-13 14:03:54,243 (trainer:732) INFO: 44epoch:train:8501-8600batch: iter_time=1.303e-04, forward_time=0.147, loss_ctc=84.550, loss_att=58.625, acc=0.720, loss=66.402, backward_time=1.031, grad_norm=124.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.401e-05, train_time=2.732 +[gpub002:0/64] 2023-07-13 14:06:11,819 (trainer:732) INFO: 44epoch:train:8601-8700batch: iter_time=1.454e-04, forward_time=0.146, loss_ctc=68.495, loss_att=47.376, acc=0.706, loss=53.712, backward_time=1.032, grad_norm=109.505, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.400e-05, train_time=2.751 +[gpub002:0/64] 2023-07-13 14:08:27,943 (trainer:732) INFO: 44epoch:train:8701-8800batch: iter_time=1.376e-04, forward_time=0.146, loss_ctc=74.817, loss_att=58.004, acc=0.704, loss=63.048, backward_time=1.031, grad_norm=116.827, clip=100.000, loss_scale=5.257e+32, optim_step_time=0.181, optim0_lr0=5.400e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 14:10:46,494 (trainer:732) INFO: 44epoch:train:8801-8900batch: iter_time=1.356e-04, forward_time=0.147, loss_ctc=78.590, loss_att=52.950, acc=0.730, loss=60.642, backward_time=1.032, grad_norm=116.926, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.399e-05, train_time=2.771 +[gpub002:0/64] 2023-07-13 14:13:06,393 (trainer:732) INFO: 44epoch:train:8901-9000batch: iter_time=1.178e-04, forward_time=0.148, loss_ctc=72.075, loss_att=54.319, acc=0.717, loss=59.646, backward_time=1.037, grad_norm=123.080, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.398e-05, train_time=2.798 +[gpub002:0/64] 2023-07-13 14:15:26,105 (trainer:732) INFO: 44epoch:train:9001-9100batch: iter_time=1.106e-04, forward_time=0.146, loss_ctc=79.231, loss_att=65.332, acc=0.701, loss=69.502, backward_time=1.047, grad_norm=112.101, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.398e-05, train_time=2.794 +[gpub002:0/64] 2023-07-13 14:17:01,910 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-13 14:17:20,361 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 14:17:24,093 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 14:17:24,093 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-13 14:17:24,100 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 14:23:35,296 (trainer:732) INFO: 44epoch:train:9101-9200batch: iter_time=1.994, forward_time=0.152, loss_ctc=64.897, loss_att=47.438, acc=0.726, loss=52.676, backward_time=1.049, grad_norm=112.693, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.397e-05, train_time=9.784 +[gpub002:0/64] 2023-07-13 14:25:52,081 (trainer:732) INFO: 44epoch:train:9201-9300batch: iter_time=1.231e-04, forward_time=0.145, loss_ctc=69.610, loss_att=54.648, acc=0.710, loss=59.136, backward_time=1.028, grad_norm=111.652, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.396e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 14:28:09,358 (trainer:732) INFO: 44epoch:train:9301-9400batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=73.827, loss_att=60.050, acc=0.696, loss=64.183, backward_time=1.027, grad_norm=114.849, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.396e-05, train_time=2.745 +[gpub002:0/64] 2023-07-13 14:30:25,352 (trainer:732) INFO: 44epoch:train:9401-9500batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=84.509, loss_att=55.869, acc=0.724, loss=64.461, backward_time=1.028, grad_norm=134.698, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.395e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 14:32:41,493 (trainer:732) INFO: 44epoch:train:9501-9600batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=65.351, loss_att=46.905, acc=0.708, loss=52.439, backward_time=1.027, grad_norm=114.409, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.395e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 14:34:57,531 (trainer:732) INFO: 44epoch:train:9601-9700batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=79.646, loss_att=58.448, acc=0.702, loss=64.808, backward_time=1.030, grad_norm=123.060, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.394e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 14:37:13,895 (trainer:732) INFO: 44epoch:train:9701-9800batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=74.065, loss_att=51.749, acc=0.719, loss=58.444, backward_time=1.028, grad_norm=130.614, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.393e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 14:39:29,723 (trainer:732) INFO: 44epoch:train:9801-9900batch: iter_time=1.294e-04, forward_time=0.146, loss_ctc=75.390, loss_att=60.899, acc=0.695, loss=65.246, backward_time=1.028, grad_norm=124.549, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.393e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 14:41:45,627 (trainer:732) INFO: 44epoch:train:9901-10000batch: iter_time=1.350e-04, forward_time=0.147, loss_ctc=75.203, loss_att=57.881, acc=0.713, loss=63.078, backward_time=1.029, grad_norm=127.418, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.392e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 14:54:13,290 (trainer:338) INFO: 44epoch results: [train] iter_time=0.229, forward_time=0.148, loss_ctc=75.300, loss_att=56.099, acc=0.709, loss=61.860, backward_time=1.033, grad_norm=128.444, clip=100.000, loss_scale=2.826e+32, optim_step_time=0.182, optim0_lr0=5.423e-05, train_time=3.485, time=4 hours, 50 minutes and 36.87 seconds, total_count=410000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.588, cer_ctc=0.256, loss_att=36.048, acc=0.706, cer=0.321, wer=0.984, loss=38.310, time=6 minutes and 1.21 seconds, total_count=41998, gpu_max_cached_mem_GB=37.574, [att_plot] time=6 minutes and 14.4 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-13 14:54:29,899 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub002:0/64] 2023-07-13 14:54:29,920 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/37epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/39epoch.pth +[gpub002:0/64] 2023-07-13 14:54:29,921 (trainer:272) INFO: 45/50epoch started. Estimated time to finish: 1 day, 6 hours and 41 minutes +[gpub002:0/64] 2023-07-13 14:54:30,027 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-13 14:54:47,960 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 14:54:52,321 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 14:54:52,321 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-13 14:54:52,372 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 15:01:36,666 (trainer:732) INFO: 45epoch:train:1-100batch: iter_time=2.837, forward_time=0.166, loss_ctc=81.665, loss_att=63.090, acc=0.699, loss=68.662, backward_time=1.044, grad_norm=124.602, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=5.391e-05, train_time=8.533 +[gpub002:0/64] 2023-07-13 15:03:34,215 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 15:03:58,918 (trainer:732) INFO: 45epoch:train:101-200batch: iter_time=1.454e-04, forward_time=0.167, loss_ctc=78.164, loss_att=54.166, acc=0.720, loss=61.365, backward_time=1.033, grad_norm=145.604, clip=100.000, loss_scale=5.894e+32, optim_step_time=0.183, optim0_lr0=5.391e-05, train_time=2.845 +[gpub002:0/64] 2023-07-13 15:06:25,567 (trainer:732) INFO: 45epoch:train:201-300batch: iter_time=1.205e-04, forward_time=0.151, loss_ctc=68.261, loss_att=52.387, acc=0.704, loss=57.149, backward_time=1.044, grad_norm=106.301, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.390e-05, train_time=2.933 +[gpub002:0/64] 2023-07-13 15:08:51,283 (trainer:732) INFO: 45epoch:train:301-400batch: iter_time=1.249e-04, forward_time=0.151, loss_ctc=66.533, loss_att=51.447, acc=0.717, loss=55.973, backward_time=1.039, grad_norm=108.528, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.390e-05, train_time=2.914 +[gpub002:0/64] 2023-07-13 15:11:15,915 (trainer:732) INFO: 45epoch:train:401-500batch: iter_time=1.270e-04, forward_time=0.162, loss_ctc=75.336, loss_att=58.055, acc=0.709, loss=63.240, backward_time=1.031, grad_norm=122.397, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.389e-05, train_time=2.892 +[gpub002:0/64] 2023-07-13 15:13:39,370 (trainer:732) INFO: 45epoch:train:501-600batch: iter_time=1.190e-04, forward_time=0.175, loss_ctc=80.003, loss_att=63.160, acc=0.708, loss=68.213, backward_time=1.047, grad_norm=123.534, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.388e-05, train_time=2.868 +[gpub002:0/64] 2023-07-13 15:16:05,037 (trainer:732) INFO: 45epoch:train:601-700batch: iter_time=1.220e-04, forward_time=0.160, loss_ctc=66.333, loss_att=48.550, acc=0.711, loss=53.885, backward_time=1.041, grad_norm=123.517, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.388e-05, train_time=2.914 +[gpub002:0/64] 2023-07-13 15:18:34,274 (trainer:732) INFO: 45epoch:train:701-800batch: iter_time=1.331e-04, forward_time=0.169, loss_ctc=70.528, loss_att=52.616, acc=0.702, loss=57.989, backward_time=1.059, grad_norm=106.529, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.387e-05, train_time=2.985 +[gpub002:0/64] 2023-07-13 15:19:38,148 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 15:19:56,052 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 15:19:59,706 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 15:19:59,706 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 15:19:59,712 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 15:25:05,470 (trainer:732) INFO: 45epoch:train:801-900batch: iter_time=1.925, forward_time=0.196, loss_ctc=70.359, loss_att=50.574, acc=0.717, loss=56.510, backward_time=1.041, grad_norm=156.159, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.386e-05, train_time=7.823 +[gpub002:0/64] 2023-07-13 15:27:22,701 (trainer:732) INFO: 45epoch:train:901-1000batch: iter_time=1.363e-04, forward_time=0.147, loss_ctc=79.920, loss_att=62.286, acc=0.707, loss=67.576, backward_time=1.033, grad_norm=133.899, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.386e-05, train_time=2.745 +[gpub002:0/64] 2023-07-13 15:29:38,869 (trainer:732) INFO: 45epoch:train:1001-1100batch: iter_time=1.411e-04, forward_time=0.147, loss_ctc=73.445, loss_att=53.330, acc=0.704, loss=59.364, backward_time=1.030, grad_norm=132.822, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.385e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 15:31:56,763 (trainer:732) INFO: 45epoch:train:1101-1200batch: iter_time=4.801e-04, forward_time=0.146, loss_ctc=71.514, loss_att=56.740, acc=0.723, loss=61.172, backward_time=1.034, grad_norm=142.017, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.385e-05, train_time=2.758 +[gpub002:0/64] 2023-07-13 15:34:12,180 (trainer:732) INFO: 45epoch:train:1201-1300batch: iter_time=9.975e-05, forward_time=0.143, loss_ctc=64.208, loss_att=48.306, acc=0.716, loss=53.077, backward_time=1.025, grad_norm=147.537, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.384e-05, train_time=2.708 +[gpub002:0/64] 2023-07-13 15:36:49,716 (trainer:732) INFO: 45epoch:train:1301-1400batch: iter_time=4.440e-04, forward_time=0.316, loss_ctc=76.597, loss_att=58.015, acc=0.710, loss=63.590, backward_time=1.059, grad_norm=118.802, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.383e-05, train_time=3.150 +[gpub002:0/64] 2023-07-13 15:39:05,648 (trainer:732) INFO: 45epoch:train:1401-1500batch: iter_time=1.019e-04, forward_time=0.145, loss_ctc=75.275, loss_att=58.906, acc=0.714, loss=63.817, backward_time=1.028, grad_norm=121.195, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.383e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 15:41:21,653 (trainer:732) INFO: 45epoch:train:1501-1600batch: iter_time=1.027e-04, forward_time=0.145, loss_ctc=67.840, loss_att=50.503, acc=0.699, loss=55.704, backward_time=1.028, grad_norm=110.323, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.382e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 15:43:03,648 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 15:43:21,668 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 15:43:25,353 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 15:43:25,353 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 15:43:25,360 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 15:47:55,266 (trainer:732) INFO: 45epoch:train:1601-1700batch: iter_time=2.508, forward_time=0.175, loss_ctc=74.468, loss_att=56.404, acc=0.712, loss=61.823, backward_time=1.040, grad_norm=106.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.381e-05, train_time=7.873 +[gpub002:0/64] 2023-07-13 15:50:12,061 (trainer:732) INFO: 45epoch:train:1701-1800batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=71.768, loss_att=53.722, acc=0.718, loss=59.136, backward_time=1.032, grad_norm=124.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.381e-05, train_time=2.736 +[gpub002:0/64] 2023-07-13 15:52:28,137 (trainer:732) INFO: 45epoch:train:1801-1900batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=73.393, loss_att=54.484, acc=0.706, loss=60.157, backward_time=1.028, grad_norm=122.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.380e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 15:54:44,035 (trainer:732) INFO: 45epoch:train:1901-2000batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=72.218, loss_att=56.883, acc=0.716, loss=61.484, backward_time=1.029, grad_norm=99.995, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.380e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 15:56:59,596 (trainer:732) INFO: 45epoch:train:2001-2100batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=64.329, loss_att=47.171, acc=0.728, loss=52.319, backward_time=1.027, grad_norm=106.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.379e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 15:59:15,764 (trainer:732) INFO: 45epoch:train:2101-2200batch: iter_time=1.161e-04, forward_time=0.147, loss_ctc=78.537, loss_att=63.446, acc=0.702, loss=67.973, backward_time=1.030, grad_norm=116.976, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.378e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 16:01:31,536 (trainer:732) INFO: 45epoch:train:2201-2300batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=72.395, loss_att=52.310, acc=0.716, loss=58.335, backward_time=1.027, grad_norm=121.900, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.378e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 16:04:07,432 (trainer:732) INFO: 45epoch:train:2301-2400batch: iter_time=4.133e-04, forward_time=0.291, loss_ctc=64.940, loss_att=49.301, acc=0.698, loss=53.992, backward_time=1.048, grad_norm=119.660, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.195, optim0_lr0=5.377e-05, train_time=3.119 +[gpub002:0/64] 2023-07-13 16:06:26,316 (trainer:732) INFO: 45epoch:train:2401-2500batch: iter_time=1.060e-04, forward_time=0.146, loss_ctc=66.773, loss_att=49.837, acc=0.725, loss=54.918, backward_time=1.033, grad_norm=113.051, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.376e-05, train_time=2.778 +[gpub002:0/64] 2023-07-13 16:06:47,569 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 16:07:05,636 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 16:07:09,245 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 16:07:09,246 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 16:07:09,252 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 16:13:45,255 (trainer:732) INFO: 45epoch:train:2501-2600batch: iter_time=2.932, forward_time=0.146, loss_ctc=75.245, loss_att=59.285, acc=0.705, loss=64.073, backward_time=1.045, grad_norm=143.838, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.376e-05, train_time=8.779 +[gpub002:0/64] 2023-07-13 16:16:02,493 (trainer:732) INFO: 45epoch:train:2601-2700batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=71.169, loss_att=50.380, acc=0.721, loss=56.617, backward_time=1.031, grad_norm=126.623, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.375e-05, train_time=2.745 +[gpub002:0/64] 2023-07-13 16:18:18,405 (trainer:732) INFO: 45epoch:train:2701-2800batch: iter_time=1.474e-04, forward_time=0.146, loss_ctc=72.864, loss_att=55.984, acc=0.701, loss=61.048, backward_time=1.030, grad_norm=110.866, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.375e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 16:20:34,197 (trainer:732) INFO: 45epoch:train:2801-2900batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=71.354, loss_att=54.781, acc=0.724, loss=59.753, backward_time=1.029, grad_norm=114.172, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.374e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 16:22:50,091 (trainer:732) INFO: 45epoch:train:2901-3000batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=63.704, loss_att=47.879, acc=0.721, loss=52.626, backward_time=1.031, grad_norm=113.241, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.373e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 16:25:08,892 (trainer:732) INFO: 45epoch:train:3001-3100batch: iter_time=1.287e-04, forward_time=0.146, loss_ctc=80.531, loss_att=66.219, acc=0.702, loss=70.512, backward_time=1.031, grad_norm=129.615, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.373e-05, train_time=2.776 +[gpub002:0/64] 2023-07-13 16:27:26,710 (trainer:732) INFO: 45epoch:train:3101-3200batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=74.038, loss_att=52.789, acc=0.722, loss=59.164, backward_time=1.033, grad_norm=120.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.372e-05, train_time=2.756 +[gpub002:0/64] 2023-07-13 16:29:45,474 (trainer:732) INFO: 45epoch:train:3201-3300batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=64.774, loss_att=50.047, acc=0.692, loss=54.465, backward_time=1.031, grad_norm=159.741, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.372e-05, train_time=2.775 +[gpub002:0/64] 2023-07-13 16:30:34,615 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 16:30:52,965 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 16:30:56,648 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 16:30:56,648 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 16:30:56,655 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 16:36:51,770 (trainer:732) INFO: 45epoch:train:3301-3400batch: iter_time=1.357, forward_time=0.191, loss_ctc=74.879, loss_att=59.904, acc=0.707, loss=64.396, backward_time=1.042, grad_norm=130.493, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.371e-05, train_time=8.525 +[gpub002:0/64] 2023-07-13 16:39:07,620 (trainer:732) INFO: 45epoch:train:3401-3500batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=72.274, loss_att=48.909, acc=0.725, loss=55.919, backward_time=1.029, grad_norm=119.444, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.370e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 16:41:23,193 (trainer:732) INFO: 45epoch:train:3501-3600batch: iter_time=1.335e-04, forward_time=0.146, loss_ctc=73.254, loss_att=56.523, acc=0.710, loss=61.542, backward_time=1.027, grad_norm=136.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.370e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 16:43:38,990 (trainer:732) INFO: 45epoch:train:3601-3700batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=68.969, loss_att=55.099, acc=0.704, loss=59.260, backward_time=1.029, grad_norm=118.124, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.369e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 16:45:54,638 (trainer:732) INFO: 45epoch:train:3701-3800batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=67.300, loss_att=51.250, acc=0.710, loss=56.065, backward_time=1.027, grad_norm=122.728, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.368e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 16:48:11,353 (trainer:732) INFO: 45epoch:train:3801-3900batch: iter_time=1.176e-04, forward_time=0.147, loss_ctc=79.048, loss_att=65.393, acc=0.693, loss=69.490, backward_time=1.029, grad_norm=138.605, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.368e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 16:50:30,597 (trainer:732) INFO: 45epoch:train:3901-4000batch: iter_time=1.266e-04, forward_time=0.166, loss_ctc=67.808, loss_att=48.262, acc=0.723, loss=54.126, backward_time=1.032, grad_norm=118.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.367e-05, train_time=2.785 +[gpub002:0/64] 2023-07-13 16:52:46,308 (trainer:732) INFO: 45epoch:train:4001-4100batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=66.270, loss_att=49.916, acc=0.699, loss=54.822, backward_time=1.029, grad_norm=112.093, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.367e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 16:54:21,064 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 16:54:39,212 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 16:54:42,631 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 16:54:42,631 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 16:54:42,637 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 17:00:13,505 (trainer:732) INFO: 45epoch:train:4101-4200batch: iter_time=1.377, forward_time=0.185, loss_ctc=69.143, loss_att=51.631, acc=0.718, loss=56.885, backward_time=1.038, grad_norm=123.088, clip=100.000, loss_scale=3.829e+32, optim_step_time=0.184, optim0_lr0=5.366e-05, train_time=8.944 +[gpub002:0/64] 2023-07-13 17:02:31,518 (trainer:732) INFO: 45epoch:train:4201-4300batch: iter_time=1.013e-04, forward_time=0.147, loss_ctc=78.116, loss_att=61.080, acc=0.702, loss=66.191, backward_time=1.036, grad_norm=135.639, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.365e-05, train_time=2.760 +[gpub002:0/64] 2023-07-13 17:04:47,316 (trainer:732) INFO: 45epoch:train:4301-4400batch: iter_time=1.076e-04, forward_time=0.144, loss_ctc=73.863, loss_att=48.660, acc=0.731, loss=56.221, backward_time=1.028, grad_norm=111.515, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.365e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 17:07:04,029 (trainer:732) INFO: 45epoch:train:4401-4500batch: iter_time=1.058e-04, forward_time=0.146, loss_ctc=69.350, loss_att=55.129, acc=0.715, loss=59.396, backward_time=1.031, grad_norm=110.270, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.364e-05, train_time=2.734 +[gpub002:0/64] 2023-07-13 17:09:30,922 (trainer:732) INFO: 45epoch:train:4501-4600batch: iter_time=1.035e-04, forward_time=0.146, loss_ctc=64.703, loss_att=49.097, acc=0.721, loss=53.779, backward_time=1.044, grad_norm=107.915, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.363e-05, train_time=2.938 +[gpub002:0/64] 2023-07-13 17:12:18,786 (trainer:732) INFO: 45epoch:train:4601-4700batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=73.234, loss_att=56.968, acc=0.715, loss=61.847, backward_time=1.061, grad_norm=124.928, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.363e-05, train_time=3.357 +[gpub002:0/64] 2023-07-13 17:14:35,194 (trainer:732) INFO: 45epoch:train:4701-4800batch: iter_time=1.380e-04, forward_time=0.148, loss_ctc=76.055, loss_att=59.898, acc=0.712, loss=64.745, backward_time=1.032, grad_norm=120.870, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.362e-05, train_time=2.728 +[gpub002:0/64] 2023-07-13 17:16:50,801 (trainer:732) INFO: 45epoch:train:4801-4900batch: iter_time=1.407e-04, forward_time=0.146, loss_ctc=62.951, loss_att=45.591, acc=0.716, loss=50.799, backward_time=1.027, grad_norm=117.396, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.362e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 17:19:06,572 (trainer:732) INFO: 45epoch:train:4901-5000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=68.808, loss_att=51.751, acc=0.704, loss=56.868, backward_time=1.028, grad_norm=136.363, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.361e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 17:19:21,389 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 17:19:39,673 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 17:19:43,090 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 17:19:43,090 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 17:19:43,096 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 17:24:59,179 (trainer:732) INFO: 45epoch:train:5001-5100batch: iter_time=2.009, forward_time=0.147, loss_ctc=73.429, loss_att=58.346, acc=0.701, loss=62.871, backward_time=1.048, grad_norm=125.174, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.360e-05, train_time=7.052 +[gpub002:0/64] 2023-07-13 17:27:16,621 (trainer:732) INFO: 45epoch:train:5101-5200batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=69.877, loss_att=49.329, acc=0.732, loss=55.493, backward_time=1.030, grad_norm=121.180, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.360e-05, train_time=2.749 +[gpub002:0/64] 2023-07-13 17:29:32,165 (trainer:732) INFO: 45epoch:train:5201-5300batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=72.769, loss_att=55.528, acc=0.703, loss=60.701, backward_time=1.026, grad_norm=161.988, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.359e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 17:31:47,590 (trainer:732) INFO: 45epoch:train:5301-5400batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=71.449, loss_att=55.740, acc=0.713, loss=60.453, backward_time=1.025, grad_norm=134.067, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.359e-05, train_time=2.708 +[gpub002:0/64] 2023-07-13 17:34:03,500 (trainer:732) INFO: 45epoch:train:5401-5500batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=63.744, loss_att=47.642, acc=0.716, loss=52.473, backward_time=1.028, grad_norm=120.630, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.358e-05, train_time=2.718 +[gpub002:0/64] 2023-07-13 17:36:19,787 (trainer:732) INFO: 45epoch:train:5501-5600batch: iter_time=1.088e-04, forward_time=0.145, loss_ctc=80.084, loss_att=68.051, acc=0.690, loss=71.661, backward_time=1.028, grad_norm=136.204, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.357e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 17:38:35,468 (trainer:732) INFO: 45epoch:train:5601-5700batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=73.364, loss_att=52.280, acc=0.718, loss=58.605, backward_time=1.026, grad_norm=133.967, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.357e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 17:40:50,747 (trainer:732) INFO: 45epoch:train:5701-5800batch: iter_time=1.299e-04, forward_time=0.145, loss_ctc=64.239, loss_att=50.021, acc=0.693, loss=54.286, backward_time=1.026, grad_norm=113.559, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.356e-05, train_time=2.705 +[gpub002:0/64] 2023-07-13 17:41:39,369 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 17:41:57,470 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 17:42:01,065 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 17:42:01,065 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 17:42:01,071 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 17:49:41,785 (trainer:732) INFO: 45epoch:train:5801-5900batch: iter_time=1.389, forward_time=0.199, loss_ctc=66.566, loss_att=47.542, acc=0.725, loss=53.249, backward_time=1.047, grad_norm=123.650, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.185, optim0_lr0=5.355e-05, train_time=10.620 +[gpub002:0/64] 2023-07-13 17:51:58,518 (trainer:732) INFO: 45epoch:train:5901-6000batch: iter_time=1.269e-04, forward_time=0.148, loss_ctc=76.867, loss_att=60.070, acc=0.716, loss=65.109, backward_time=1.030, grad_norm=146.171, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.355e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 17:54:14,647 (trainer:732) INFO: 45epoch:train:6001-6100batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=73.164, loss_att=52.146, acc=0.712, loss=58.451, backward_time=1.031, grad_norm=118.081, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.354e-05, train_time=2.722 +[gpub002:0/64] 2023-07-13 17:56:25,548 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 17:56:30,992 (trainer:732) INFO: 45epoch:train:6101-6200batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=72.297, loss_att=55.985, acc=0.727, loss=60.878, backward_time=1.033, grad_norm=127.067, clip=100.000, loss_scale=6.358e+32, optim_step_time=0.182, optim0_lr0=5.354e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 17:58:48,413 (trainer:732) INFO: 45epoch:train:6201-6300batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=62.624, loss_att=46.194, acc=0.728, loss=51.123, backward_time=1.042, grad_norm=124.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.353e-05, train_time=2.748 +[gpub002:0/64] 2023-07-13 18:01:05,345 (trainer:732) INFO: 45epoch:train:6301-6400batch: iter_time=9.910e-05, forward_time=0.146, loss_ctc=75.500, loss_att=57.283, acc=0.714, loss=62.749, backward_time=1.031, grad_norm=118.320, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.352e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 18:03:22,316 (trainer:732) INFO: 45epoch:train:6401-6500batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=70.999, loss_att=56.502, acc=0.721, loss=60.851, backward_time=1.031, grad_norm=119.958, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.352e-05, train_time=2.739 +[gpub002:0/64] 2023-07-13 18:05:38,177 (trainer:732) INFO: 45epoch:train:6501-6600batch: iter_time=1.006e-04, forward_time=0.144, loss_ctc=66.854, loss_att=49.259, acc=0.703, loss=54.538, backward_time=1.027, grad_norm=102.459, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.351e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 18:07:21,216 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 18:07:39,658 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 18:07:43,082 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 18:07:43,082 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 18:07:43,088 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 18:12:41,336 (trainer:732) INFO: 45epoch:train:6601-6700batch: iter_time=1.633, forward_time=0.145, loss_ctc=73.582, loss_att=56.482, acc=0.708, loss=61.612, backward_time=1.040, grad_norm=136.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.351e-05, train_time=8.463 +[gpub002:0/64] 2023-07-13 18:14:58,720 (trainer:732) INFO: 45epoch:train:6701-6800batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=70.722, loss_att=52.661, acc=0.720, loss=58.080, backward_time=1.033, grad_norm=127.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.350e-05, train_time=2.747 +[gpub002:0/64] 2023-07-13 18:17:17,712 (trainer:732) INFO: 45epoch:train:6801-6900batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=73.147, loss_att=53.137, acc=0.710, loss=59.140, backward_time=1.047, grad_norm=130.410, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.349e-05, train_time=2.780 +[gpub002:0/64] 2023-07-13 18:19:40,571 (trainer:732) INFO: 45epoch:train:6901-7000batch: iter_time=1.008e-04, forward_time=0.145, loss_ctc=70.596, loss_att=56.317, acc=0.715, loss=60.601, backward_time=1.032, grad_norm=138.486, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.349e-05, train_time=2.857 +[gpub002:0/64] 2023-07-13 18:22:00,623 (trainer:732) INFO: 45epoch:train:7001-7100batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=63.705, loss_att=46.602, acc=0.721, loss=51.733, backward_time=1.033, grad_norm=101.592, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.348e-05, train_time=2.801 +[gpub002:0/64] 2023-07-13 18:24:43,695 (trainer:732) INFO: 45epoch:train:7101-7200batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=78.651, loss_att=65.443, acc=0.692, loss=69.405, backward_time=1.056, grad_norm=153.875, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.348e-05, train_time=3.261 +[gpub002:0/64] 2023-07-13 18:27:00,050 (trainer:732) INFO: 45epoch:train:7201-7300batch: iter_time=1.117e-04, forward_time=0.146, loss_ctc=69.650, loss_att=51.477, acc=0.714, loss=56.929, backward_time=1.030, grad_norm=126.353, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.347e-05, train_time=2.727 +[gpub002:0/64] 2023-07-13 18:29:15,908 (trainer:732) INFO: 45epoch:train:7301-7400batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=64.906, loss_att=48.559, acc=0.703, loss=53.463, backward_time=1.030, grad_norm=131.884, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.346e-05, train_time=2.717 +[gpub002:0/64] 2023-07-13 18:31:31,376 (trainer:732) INFO: 45epoch:train:7401-7500batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=66.285, loss_att=49.804, acc=0.719, loss=54.748, backward_time=1.027, grad_norm=119.238, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.346e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 18:31:39,809 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 18:31:58,133 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 18:32:01,554 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 18:32:01,554 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-13 18:32:01,560 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 18:38:55,570 (trainer:732) INFO: 45epoch:train:7501-7600batch: iter_time=2.968, forward_time=0.205, loss_ctc=74.821, loss_att=58.310, acc=0.701, loss=63.263, backward_time=1.049, grad_norm=140.580, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.345e-05, train_time=8.883 +[gpub002:0/64] 2023-07-13 18:41:12,994 (trainer:732) INFO: 45epoch:train:7601-7700batch: iter_time=1.401e-04, forward_time=0.146, loss_ctc=70.499, loss_att=49.199, acc=0.733, loss=55.589, backward_time=1.030, grad_norm=115.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.344e-05, train_time=2.749 +[gpub002:0/64] 2023-07-13 18:43:29,468 (trainer:732) INFO: 45epoch:train:7701-7800batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=72.586, loss_att=55.027, acc=0.709, loss=60.295, backward_time=1.027, grad_norm=112.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.344e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 18:45:45,793 (trainer:732) INFO: 45epoch:train:7801-7900batch: iter_time=1.293e-04, forward_time=0.144, loss_ctc=72.517, loss_att=54.985, acc=0.716, loss=60.244, backward_time=1.025, grad_norm=117.890, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.343e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 18:48:24,077 (trainer:732) INFO: 45epoch:train:7901-8000batch: iter_time=1.442e-04, forward_time=0.145, loss_ctc=63.111, loss_att=46.756, acc=0.716, loss=51.662, backward_time=1.057, grad_norm=120.211, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.343e-05, train_time=3.165 +[gpub002:0/64] 2023-07-13 18:50:40,521 (trainer:732) INFO: 45epoch:train:8001-8100batch: iter_time=1.278e-04, forward_time=0.147, loss_ctc=78.732, loss_att=66.222, acc=0.695, loss=69.975, backward_time=1.031, grad_norm=170.247, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.342e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 18:52:56,982 (trainer:732) INFO: 45epoch:train:8101-8200batch: iter_time=1.082e-04, forward_time=0.146, loss_ctc=71.237, loss_att=50.828, acc=0.721, loss=56.951, backward_time=1.031, grad_norm=144.493, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.341e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 18:55:12,607 (trainer:732) INFO: 45epoch:train:8201-8300batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=63.342, loss_att=47.314, acc=0.705, loss=52.123, backward_time=1.027, grad_norm=120.140, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.341e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 18:56:02,679 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-13 18:56:21,043 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 18:56:24,475 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 18:56:24,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-13 18:56:24,481 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 19:01:54,411 (trainer:732) INFO: 45epoch:train:8301-8400batch: iter_time=1.403, forward_time=0.145, loss_ctc=67.802, loss_att=47.923, acc=0.722, loss=53.886, backward_time=1.041, grad_norm=103.039, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.340e-05, train_time=8.036 +[gpub002:0/64] 2023-07-13 19:04:12,062 (trainer:732) INFO: 45epoch:train:8401-8500batch: iter_time=1.296e-04, forward_time=0.144, loss_ctc=77.578, loss_att=59.511, acc=0.713, loss=64.931, backward_time=1.028, grad_norm=113.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.340e-05, train_time=2.753 +[gpub002:0/64] 2023-07-13 19:06:36,114 (trainer:732) INFO: 45epoch:train:8501-8600batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=72.515, loss_att=51.973, acc=0.709, loss=58.136, backward_time=1.037, grad_norm=112.074, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.339e-05, train_time=2.881 +[gpub002:0/64] 2023-07-13 19:08:58,016 (trainer:732) INFO: 45epoch:train:8601-8700batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=70.363, loss_att=55.476, acc=0.722, loss=59.942, backward_time=1.052, grad_norm=139.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.338e-05, train_time=2.838 +[gpub002:0/64] 2023-07-13 19:11:23,653 (trainer:732) INFO: 45epoch:train:8701-8800batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=61.325, loss_att=45.766, acc=0.718, loss=50.433, backward_time=1.067, grad_norm=128.516, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.338e-05, train_time=2.913 +[gpub002:0/64] 2023-07-13 19:13:45,579 (trainer:732) INFO: 45epoch:train:8801-8900batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=75.654, loss_att=57.454, acc=0.712, loss=62.914, backward_time=1.033, grad_norm=163.219, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.337e-05, train_time=2.838 +[gpub002:0/64] 2023-07-13 19:16:01,117 (trainer:732) INFO: 45epoch:train:8901-9000batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=71.877, loss_att=58.040, acc=0.706, loss=62.191, backward_time=1.026, grad_norm=150.120, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.337e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 19:18:23,985 (trainer:732) INFO: 45epoch:train:9001-9100batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=65.441, loss_att=49.344, acc=0.702, loss=54.173, backward_time=1.036, grad_norm=131.564, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.336e-05, train_time=2.857 +[gpub002:0/64] 2023-07-13 19:20:22,038 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-13 19:20:40,563 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 19:20:44,273 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 19:20:44,273 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 19:20:44,279 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 19:26:50,491 (trainer:732) INFO: 45epoch:train:9101-9200batch: iter_time=1.709, forward_time=0.179, loss_ctc=67.413, loss_att=51.765, acc=0.712, loss=56.459, backward_time=1.050, grad_norm=122.252, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.335e-05, train_time=10.130 +[gpub002:0/64] 2023-07-13 19:29:07,652 (trainer:732) INFO: 45epoch:train:9201-9300batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=76.983, loss_att=59.756, acc=0.700, loss=64.924, backward_time=1.032, grad_norm=139.788, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.335e-05, train_time=2.742 +[gpub002:0/64] 2023-07-13 19:31:25,100 (trainer:732) INFO: 45epoch:train:9301-9400batch: iter_time=9.813e-05, forward_time=0.145, loss_ctc=72.296, loss_att=47.126, acc=0.734, loss=54.677, backward_time=1.028, grad_norm=113.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.334e-05, train_time=2.750 +[gpub002:0/64] 2023-07-13 19:33:41,318 (trainer:732) INFO: 45epoch:train:9401-9500batch: iter_time=9.632e-05, forward_time=0.145, loss_ctc=71.884, loss_att=54.495, acc=0.719, loss=59.712, backward_time=1.027, grad_norm=117.673, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.334e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 19:35:57,136 (trainer:732) INFO: 45epoch:train:9501-9600batch: iter_time=1.092e-04, forward_time=0.145, loss_ctc=64.752, loss_att=48.917, acc=0.719, loss=53.668, backward_time=1.026, grad_norm=117.335, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.333e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 19:38:13,202 (trainer:732) INFO: 45epoch:train:9601-9700batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=71.752, loss_att=55.035, acc=0.712, loss=60.050, backward_time=1.028, grad_norm=125.109, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.332e-05, train_time=2.721 +[gpub002:0/64] 2023-07-13 19:40:28,939 (trainer:732) INFO: 45epoch:train:9701-9800batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=75.944, loss_att=60.847, acc=0.702, loss=65.376, backward_time=1.027, grad_norm=149.069, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.332e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 19:42:44,392 (trainer:732) INFO: 45epoch:train:9801-9900batch: iter_time=1.022e-04, forward_time=0.143, loss_ctc=61.928, loss_att=45.638, acc=0.717, loss=50.525, backward_time=1.025, grad_norm=106.833, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.331e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 19:44:59,782 (trainer:732) INFO: 45epoch:train:9901-10000batch: iter_time=9.628e-05, forward_time=0.144, loss_ctc=68.171, loss_att=50.374, acc=0.709, loss=55.713, backward_time=1.024, grad_norm=116.675, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.330e-05, train_time=2.708 +[gpub002:0/64] 2023-07-13 19:59:12,558 (trainer:338) INFO: 45epoch results: [train] iter_time=0.241, forward_time=0.153, loss_ctc=71.101, loss_att=53.886, acc=0.712, loss=59.051, backward_time=1.034, grad_norm=125.414, clip=100.000, loss_scale=3.957e+32, optim_step_time=0.183, optim0_lr0=5.361e-05, train_time=3.486, time=4 hours, 50 minutes and 52.55 seconds, total_count=420000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=42.917, cer_ctc=0.255, loss_att=35.722, acc=0.704, cer=0.334, wer=0.986, loss=37.881, time=7 minutes and 50.24 seconds, total_count=43010, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 59.73 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-13 19:59:28,695 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-13 19:59:28,788 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/29epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/40epoch.pth +[gpub002:0/64] 2023-07-13 19:59:28,788 (trainer:272) INFO: 46/50epoch started. Estimated time to finish: 1 day, 1 hour and 33 minutes +[gpub002:0/64] 2023-07-13 19:59:28,792 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-13 19:59:46,914 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 19:59:50,345 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 19:59:50,345 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-13 19:59:50,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 20:04:40,186 (trainer:732) INFO: 46epoch:train:1-100batch: iter_time=1.647, forward_time=0.188, loss_ctc=65.883, loss_att=56.587, acc=0.712, loss=59.376, backward_time=1.048, grad_norm=121.603, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.330e-05, train_time=6.227 +[gpub002:0/64] 2023-07-13 20:06:57,011 (trainer:732) INFO: 46epoch:train:101-200batch: iter_time=1.304e-04, forward_time=0.146, loss_ctc=73.119, loss_att=52.389, acc=0.708, loss=58.608, backward_time=1.033, grad_norm=126.577, clip=100.000, loss_scale=3.375e+32, optim_step_time=0.182, optim0_lr0=5.329e-05, train_time=2.737 +[gpub002:0/64] 2023-07-13 20:09:13,490 (trainer:732) INFO: 46epoch:train:201-300batch: iter_time=1.219e-04, forward_time=0.144, loss_ctc=70.101, loss_att=49.203, acc=0.726, loss=55.472, backward_time=1.027, grad_norm=136.978, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.329e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 20:11:29,153 (trainer:732) INFO: 46epoch:train:301-400batch: iter_time=1.195e-04, forward_time=0.145, loss_ctc=67.230, loss_att=54.634, acc=0.706, loss=58.413, backward_time=1.026, grad_norm=117.807, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.328e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 20:13:47,110 (trainer:732) INFO: 46epoch:train:401-500batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=68.084, loss_att=51.893, acc=0.716, loss=56.751, backward_time=1.029, grad_norm=120.025, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.327e-05, train_time=2.759 +[gpub002:0/64] 2023-07-13 20:16:12,488 (trainer:732) INFO: 46epoch:train:501-600batch: iter_time=1.310e-04, forward_time=0.144, loss_ctc=81.257, loss_att=58.875, acc=0.709, loss=65.590, backward_time=1.037, grad_norm=140.480, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.327e-05, train_time=2.907 +[gpub002:0/64] 2023-07-13 20:18:37,169 (trainer:732) INFO: 46epoch:train:601-700batch: iter_time=1.257e-04, forward_time=0.145, loss_ctc=62.945, loss_att=44.662, acc=0.715, loss=50.147, backward_time=1.036, grad_norm=124.728, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.326e-05, train_time=2.893 +[gpub002:0/64] 2023-07-13 20:21:00,820 (trainer:732) INFO: 46epoch:train:701-800batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=73.224, loss_att=50.781, acc=0.730, loss=57.514, backward_time=1.034, grad_norm=141.813, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.326e-05, train_time=2.873 +[gpub002:0/64] 2023-07-13 20:22:00,295 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-13 20:22:18,119 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 20:22:21,538 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 20:22:21,538 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-13 20:22:21,544 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 20:28:54,861 (trainer:732) INFO: 46epoch:train:801-900batch: iter_time=3.233, forward_time=0.196, loss_ctc=66.161, loss_att=51.935, acc=0.720, loss=56.203, backward_time=1.045, grad_norm=126.426, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.190, optim0_lr0=5.325e-05, train_time=9.480 +[gpub002:0/64] 2023-07-13 20:31:12,985 (trainer:732) INFO: 46epoch:train:901-1000batch: iter_time=0.001, forward_time=0.153, loss_ctc=74.152, loss_att=55.993, acc=0.706, loss=61.441, backward_time=1.032, grad_norm=120.946, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.324e-05, train_time=2.763 +[gpub002:0/64] 2023-07-13 20:33:29,468 (trainer:732) INFO: 46epoch:train:1001-1100batch: iter_time=0.001, forward_time=0.146, loss_ctc=65.898, loss_att=48.417, acc=0.733, loss=53.661, backward_time=1.032, grad_norm=117.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.324e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 20:36:03,141 (trainer:732) INFO: 46epoch:train:1101-1200batch: iter_time=2.935e-04, forward_time=0.273, loss_ctc=68.912, loss_att=53.279, acc=0.714, loss=57.969, backward_time=1.053, grad_norm=151.138, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.189, optim0_lr0=5.323e-05, train_time=3.072 +[gpub002:0/64] 2023-07-13 20:38:19,536 (trainer:732) INFO: 46epoch:train:1201-1300batch: iter_time=1.362e-04, forward_time=0.146, loss_ctc=70.800, loss_att=51.181, acc=0.720, loss=57.067, backward_time=1.029, grad_norm=152.229, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.323e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 20:40:36,527 (trainer:732) INFO: 46epoch:train:1301-1400batch: iter_time=1.269e-04, forward_time=0.150, loss_ctc=73.810, loss_att=54.374, acc=0.723, loss=60.205, backward_time=1.030, grad_norm=167.798, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.322e-05, train_time=2.740 +[gpub002:0/64] 2023-07-13 20:42:52,466 (trainer:732) INFO: 46epoch:train:1401-1500batch: iter_time=1.267e-04, forward_time=0.146, loss_ctc=68.079, loss_att=49.216, acc=0.708, loss=54.875, backward_time=1.026, grad_norm=209.029, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.321e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 20:45:08,125 (trainer:732) INFO: 46epoch:train:1501-1600batch: iter_time=1.277e-04, forward_time=0.144, loss_ctc=69.590, loss_att=48.587, acc=0.725, loss=54.888, backward_time=1.026, grad_norm=143.890, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.321e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 20:46:58,016 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-13 20:47:16,447 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 20:47:19,843 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 20:47:19,843 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-13 20:47:19,979 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 20:53:32,464 (trainer:732) INFO: 46epoch:train:1601-1700batch: iter_time=3.523, forward_time=0.178, loss_ctc=68.290, loss_att=51.711, acc=0.723, loss=56.685, backward_time=1.040, grad_norm=114.130, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.320e-05, train_time=10.086 +[gpub002:0/64] 2023-07-13 20:55:49,050 (trainer:732) INFO: 46epoch:train:1701-1800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=69.254, loss_att=57.546, acc=0.707, loss=61.058, backward_time=1.030, grad_norm=147.850, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.320e-05, train_time=2.732 +[gpub002:0/64] 2023-07-13 20:58:04,545 (trainer:732) INFO: 46epoch:train:1801-1900batch: iter_time=1.289e-04, forward_time=0.144, loss_ctc=67.059, loss_att=46.493, acc=0.719, loss=52.663, backward_time=1.026, grad_norm=125.550, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.319e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 21:00:20,791 (trainer:732) INFO: 46epoch:train:1901-2000batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=67.667, loss_att=50.905, acc=0.727, loss=55.934, backward_time=1.029, grad_norm=127.276, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.318e-05, train_time=2.725 +[gpub002:0/64] 2023-07-13 21:02:36,612 (trainer:732) INFO: 46epoch:train:2001-2100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=67.126, loss_att=48.506, acc=0.720, loss=54.092, backward_time=1.028, grad_norm=118.373, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.318e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 21:04:53,111 (trainer:732) INFO: 46epoch:train:2101-2200batch: iter_time=1.325e-04, forward_time=0.148, loss_ctc=72.830, loss_att=55.843, acc=0.709, loss=60.939, backward_time=1.030, grad_norm=138.565, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.317e-05, train_time=2.729 +[gpub002:0/64] 2023-07-13 21:07:08,871 (trainer:732) INFO: 46epoch:train:2201-2300batch: iter_time=1.351e-04, forward_time=0.145, loss_ctc=69.103, loss_att=52.736, acc=0.704, loss=57.646, backward_time=1.028, grad_norm=122.386, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.317e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 21:09:24,380 (trainer:732) INFO: 46epoch:train:2301-2400batch: iter_time=1.543e-04, forward_time=0.145, loss_ctc=63.429, loss_att=44.374, acc=0.725, loss=50.090, backward_time=1.027, grad_norm=140.538, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.316e-05, train_time=2.710 +[gpub002:0/64] 2023-07-13 21:11:48,859 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-13 21:12:06,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 21:12:10,395 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 21:12:10,395 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-13 21:12:10,401 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 21:17:51,096 (trainer:732) INFO: 46epoch:train:2401-2500batch: iter_time=1.340, forward_time=0.145, loss_ctc=74.058, loss_att=50.477, acc=0.726, loss=57.551, backward_time=1.066, grad_norm=158.986, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.315e-05, train_time=10.134 +[gpub002:0/64] 2023-07-13 21:20:12,148 (trainer:732) INFO: 46epoch:train:2501-2600batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=73.951, loss_att=59.611, acc=0.702, loss=63.913, backward_time=1.038, grad_norm=135.632, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.315e-05, train_time=2.821 +[gpub002:0/64] 2023-07-13 21:22:28,950 (trainer:732) INFO: 46epoch:train:2601-2700batch: iter_time=1.170e-04, forward_time=0.143, loss_ctc=65.472, loss_att=47.308, acc=0.721, loss=52.757, backward_time=1.030, grad_norm=119.313, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.314e-05, train_time=2.736 +[gpub002:0/64] 2023-07-13 21:24:44,625 (trainer:732) INFO: 46epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=70.360, loss_att=52.584, acc=0.716, loss=57.916, backward_time=1.028, grad_norm=125.573, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.314e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 21:27:00,394 (trainer:732) INFO: 46epoch:train:2801-2900batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=66.237, loss_att=48.990, acc=0.715, loss=54.164, backward_time=1.028, grad_norm=119.418, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.313e-05, train_time=2.715 +[gpub002:0/64] 2023-07-13 21:29:16,078 (trainer:732) INFO: 46epoch:train:2901-3000batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=69.138, loss_att=52.011, acc=0.715, loss=57.149, backward_time=1.028, grad_norm=140.082, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.312e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 21:31:31,702 (trainer:732) INFO: 46epoch:train:3001-3100batch: iter_time=1.423e-04, forward_time=0.145, loss_ctc=69.682, loss_att=52.735, acc=0.704, loss=57.819, backward_time=1.028, grad_norm=112.062, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.312e-05, train_time=2.712 +[gpub002:0/64] 2023-07-13 21:33:47,412 (trainer:732) INFO: 46epoch:train:3101-3200batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=67.778, loss_att=47.656, acc=0.722, loss=53.693, backward_time=1.027, grad_norm=128.979, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.311e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 21:36:02,959 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 21:36:02,969 (trainer:732) INFO: 46epoch:train:3201-3300batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=74.814, loss_att=52.797, acc=0.718, loss=59.402, backward_time=1.028, grad_norm=125.040, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.311e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 21:36:49,099 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-13 21:37:07,122 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 21:37:10,547 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 21:37:10,547 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-13 21:37:10,663 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 21:42:10,676 (trainer:732) INFO: 46epoch:train:3301-3400batch: iter_time=1.578, forward_time=0.207, loss_ctc=67.011, loss_att=54.152, acc=0.720, loss=58.010, backward_time=1.045, grad_norm=167.072, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.310e-05, train_time=7.354 +[gpub002:0/64] 2023-07-13 21:44:26,985 (trainer:732) INFO: 46epoch:train:3401-3500batch: iter_time=1.279e-04, forward_time=0.146, loss_ctc=73.361, loss_att=54.970, acc=0.712, loss=60.487, backward_time=1.029, grad_norm=126.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.309e-05, train_time=2.726 +[gpub002:0/64] 2023-07-13 21:46:42,557 (trainer:732) INFO: 46epoch:train:3501-3600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=64.685, loss_att=47.711, acc=0.736, loss=52.804, backward_time=1.026, grad_norm=107.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.309e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 21:48:58,259 (trainer:732) INFO: 46epoch:train:3601-3700batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=68.193, loss_att=53.136, acc=0.717, loss=57.653, backward_time=1.026, grad_norm=121.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.308e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 21:51:15,506 (trainer:732) INFO: 46epoch:train:3701-3800batch: iter_time=1.059e-04, forward_time=0.147, loss_ctc=69.434, loss_att=50.735, acc=0.724, loss=56.345, backward_time=1.030, grad_norm=108.644, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.308e-05, train_time=2.744 +[gpub002:0/64] 2023-07-13 21:53:38,632 (trainer:732) INFO: 46epoch:train:3801-3900batch: iter_time=1.193e-04, forward_time=0.157, loss_ctc=71.575, loss_att=53.589, acc=0.723, loss=58.985, backward_time=1.046, grad_norm=142.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.307e-05, train_time=2.863 +[gpub002:0/64] 2023-07-13 21:55:57,708 (trainer:732) INFO: 46epoch:train:3901-4000batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=67.232, loss_att=49.694, acc=0.711, loss=54.956, backward_time=1.033, grad_norm=118.653, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.306e-05, train_time=2.781 +[gpub002:0/64] 2023-07-13 21:58:13,850 (trainer:732) INFO: 46epoch:train:4001-4100batch: iter_time=1.079e-04, forward_time=0.145, loss_ctc=69.507, loss_att=48.626, acc=0.726, loss=54.891, backward_time=1.030, grad_norm=115.628, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.306e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 22:00:07,007 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-13 22:00:25,302 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 22:00:28,740 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 22:00:28,740 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-13 22:00:28,746 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 22:06:11,914 (trainer:732) INFO: 46epoch:train:4101-4200batch: iter_time=3.280, forward_time=0.182, loss_ctc=67.564, loss_att=50.602, acc=0.727, loss=55.691, backward_time=1.048, grad_norm=124.825, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.305e-05, train_time=9.561 +[gpub002:0/64] 2023-07-13 22:08:28,453 (trainer:732) INFO: 46epoch:train:4201-4300batch: iter_time=1.204e-04, forward_time=0.144, loss_ctc=69.532, loss_att=59.594, acc=0.704, loss=62.576, backward_time=1.031, grad_norm=120.778, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.305e-05, train_time=2.731 +[gpub002:0/64] 2023-07-13 22:10:44,255 (trainer:732) INFO: 46epoch:train:4301-4400batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=67.758, loss_att=47.109, acc=0.721, loss=53.304, backward_time=1.027, grad_norm=107.527, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.304e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 22:12:59,720 (trainer:732) INFO: 46epoch:train:4401-4500batch: iter_time=1.420e-04, forward_time=0.144, loss_ctc=68.105, loss_att=51.162, acc=0.727, loss=56.245, backward_time=1.025, grad_norm=134.544, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.303e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 22:15:15,435 (trainer:732) INFO: 46epoch:train:4501-4600batch: iter_time=1.536e-04, forward_time=0.146, loss_ctc=67.447, loss_att=49.089, acc=0.720, loss=54.596, backward_time=1.028, grad_norm=123.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.303e-05, train_time=2.714 +[gpub002:0/64] 2023-07-13 22:17:31,236 (trainer:732) INFO: 46epoch:train:4601-4700batch: iter_time=1.373e-04, forward_time=0.146, loss_ctc=72.378, loss_att=55.337, acc=0.710, loss=60.450, backward_time=1.028, grad_norm=127.410, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.302e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 22:19:46,893 (trainer:732) INFO: 46epoch:train:4701-4800batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=68.304, loss_att=52.814, acc=0.705, loss=57.461, backward_time=1.027, grad_norm=115.040, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.302e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 22:22:02,247 (trainer:732) INFO: 46epoch:train:4801-4900batch: iter_time=1.396e-04, forward_time=0.145, loss_ctc=62.431, loss_att=43.727, acc=0.729, loss=49.338, backward_time=1.025, grad_norm=120.051, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.301e-05, train_time=2.707 +[gpub002:0/64] 2023-07-13 22:24:18,146 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-13 22:24:36,789 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 22:24:40,257 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 22:24:40,257 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-13 22:24:40,263 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 22:31:08,903 (trainer:732) INFO: 46epoch:train:4901-5000batch: iter_time=1.286, forward_time=0.146, loss_ctc=71.786, loss_att=49.940, acc=0.728, loss=56.494, backward_time=1.039, grad_norm=110.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.300e-05, train_time=10.932 +[gpub002:0/64] 2023-07-13 22:33:27,419 (trainer:732) INFO: 46epoch:train:5001-5100batch: iter_time=1.223e-04, forward_time=0.149, loss_ctc=64.751, loss_att=53.246, acc=0.721, loss=56.697, backward_time=1.037, grad_norm=99.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.300e-05, train_time=2.771 +[gpub002:0/64] 2023-07-13 22:35:43,951 (trainer:732) INFO: 46epoch:train:5101-5200batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=72.158, loss_att=51.378, acc=0.713, loss=57.612, backward_time=1.028, grad_norm=111.287, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.299e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 22:37:59,756 (trainer:732) INFO: 46epoch:train:5201-5300batch: iter_time=1.149e-04, forward_time=0.145, loss_ctc=67.483, loss_att=47.864, acc=0.735, loss=53.750, backward_time=1.028, grad_norm=125.499, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.299e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 22:40:23,841 (trainer:732) INFO: 46epoch:train:5301-5400batch: iter_time=1.153e-04, forward_time=0.145, loss_ctc=65.944, loss_att=51.781, acc=0.714, loss=56.030, backward_time=1.042, grad_norm=118.487, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.298e-05, train_time=2.881 +[gpub002:0/64] 2023-07-13 22:42:39,494 (trainer:732) INFO: 46epoch:train:5401-5500batch: iter_time=1.323e-04, forward_time=0.145, loss_ctc=68.524, loss_att=52.761, acc=0.713, loss=57.490, backward_time=1.028, grad_norm=118.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.297e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 22:44:55,465 (trainer:732) INFO: 46epoch:train:5501-5600batch: iter_time=1.347e-04, forward_time=0.146, loss_ctc=75.505, loss_att=56.510, acc=0.706, loss=62.208, backward_time=1.031, grad_norm=124.408, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.297e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 22:47:10,920 (trainer:732) INFO: 46epoch:train:5601-5700batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=62.459, loss_att=43.488, acc=0.724, loss=49.180, backward_time=1.026, grad_norm=107.132, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.296e-05, train_time=2.709 +[gpub002:0/64] 2023-07-13 22:49:26,868 (trainer:732) INFO: 46epoch:train:5701-5800batch: iter_time=1.384e-04, forward_time=0.146, loss_ctc=72.636, loss_att=50.457, acc=0.737, loss=57.111, backward_time=1.030, grad_norm=130.378, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.296e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 22:50:15,326 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-13 22:50:33,588 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 22:50:37,036 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 22:50:37,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-13 22:50:37,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 22:58:22,565 (trainer:732) INFO: 46epoch:train:5801-5900batch: iter_time=1.406, forward_time=0.196, loss_ctc=70.088, loss_att=56.947, acc=0.697, loss=60.889, backward_time=1.041, grad_norm=108.830, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.295e-05, train_time=10.713 +[gpub002:0/64] 2023-07-13 23:00:39,434 (trainer:732) INFO: 46epoch:train:5901-6000batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=69.077, loss_att=49.338, acc=0.718, loss=55.259, backward_time=1.029, grad_norm=127.466, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.295e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 23:02:54,972 (trainer:732) INFO: 46epoch:train:6001-6100batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=66.432, loss_att=45.825, acc=0.735, loss=52.007, backward_time=1.025, grad_norm=111.877, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.294e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 23:05:10,758 (trainer:732) INFO: 46epoch:train:6101-6200batch: iter_time=1.257e-04, forward_time=0.146, loss_ctc=68.644, loss_att=53.212, acc=0.712, loss=57.841, backward_time=1.028, grad_norm=126.634, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.293e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 23:07:27,287 (trainer:732) INFO: 46epoch:train:6201-6300batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=68.525, loss_att=54.107, acc=0.705, loss=58.432, backward_time=1.032, grad_norm=130.338, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.293e-05, train_time=2.730 +[gpub002:0/64] 2023-07-13 23:09:42,933 (trainer:732) INFO: 46epoch:train:6301-6400batch: iter_time=1.285e-04, forward_time=0.145, loss_ctc=73.030, loss_att=55.687, acc=0.706, loss=60.890, backward_time=1.028, grad_norm=122.293, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.292e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 23:11:58,473 (trainer:732) INFO: 46epoch:train:6401-6500batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=62.489, loss_att=42.039, acc=0.728, loss=48.174, backward_time=1.027, grad_norm=110.857, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.292e-05, train_time=2.711 +[gpub002:0/64] 2023-07-13 23:14:18,395 (trainer:732) INFO: 46epoch:train:6501-6600batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=70.293, loss_att=50.054, acc=0.730, loss=56.126, backward_time=1.033, grad_norm=116.507, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.291e-05, train_time=2.798 +[gpub002:0/64] 2023-07-13 23:16:08,051 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-13 23:16:26,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 23:16:29,635 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 23:16:29,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-13 23:16:29,682 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 23:21:49,471 (trainer:732) INFO: 46epoch:train:6601-6700batch: iter_time=3.060, forward_time=0.146, loss_ctc=72.928, loss_att=58.616, acc=0.707, loss=62.910, backward_time=1.044, grad_norm=131.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.290e-05, train_time=9.021 +[gpub002:0/64] 2023-07-13 23:24:06,689 (trainer:732) INFO: 46epoch:train:6701-6800batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=72.161, loss_att=56.066, acc=0.711, loss=60.894, backward_time=1.031, grad_norm=141.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.290e-05, train_time=2.744 +[gpub002:0/64] 2023-07-13 23:26:23,450 (trainer:732) INFO: 46epoch:train:6801-6900batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=66.501, loss_att=47.899, acc=0.733, loss=53.480, backward_time=1.029, grad_norm=134.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.289e-05, train_time=2.735 +[gpub002:0/64] 2023-07-13 23:28:50,171 (trainer:732) INFO: 46epoch:train:6901-7000batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=67.438, loss_att=53.891, acc=0.724, loss=57.955, backward_time=1.034, grad_norm=109.976, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.289e-05, train_time=2.934 +[gpub002:0/64] 2023-07-13 23:31:05,996 (trainer:732) INFO: 46epoch:train:7001-7100batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=66.567, loss_att=49.018, acc=0.721, loss=54.283, backward_time=1.028, grad_norm=118.826, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.288e-05, train_time=2.716 +[gpub002:0/64] 2023-07-13 23:33:21,934 (trainer:732) INFO: 46epoch:train:7101-7200batch: iter_time=1.132e-04, forward_time=0.144, loss_ctc=73.747, loss_att=54.406, acc=0.722, loss=60.208, backward_time=1.029, grad_norm=143.217, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.287e-05, train_time=2.719 +[gpub002:0/64] 2023-07-13 23:35:37,952 (trainer:732) INFO: 46epoch:train:7201-7300batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=64.890, loss_att=47.420, acc=0.716, loss=52.661, backward_time=1.031, grad_norm=122.847, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.287e-05, train_time=2.720 +[gpub002:0/64] 2023-07-13 23:37:54,841 (trainer:732) INFO: 46epoch:train:7301-7400batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=69.972, loss_att=48.798, acc=0.732, loss=55.150, backward_time=1.031, grad_norm=107.927, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.286e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 23:40:11,039 (trainer:732) INFO: 46epoch:train:7401-7500batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=72.249, loss_att=55.096, acc=0.722, loss=60.242, backward_time=1.030, grad_norm=116.626, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.286e-05, train_time=2.724 +[gpub002:0/64] 2023-07-13 23:40:12,748 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-13 23:40:30,664 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-13 23:40:34,096 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-13 23:40:34,096 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-13 23:40:34,102 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-13 23:46:45,686 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-13 23:48:38,986 (trainer:732) INFO: 46epoch:train:7501-7600batch: iter_time=1.321, forward_time=0.191, loss_ctc=65.367, loss_att=55.036, acc=0.722, loss=58.135, backward_time=1.048, grad_norm=119.689, clip=100.000, loss_scale=3.775e+32, optim_step_time=0.186, optim0_lr0=5.285e-05, train_time=10.159 +[gpub002:0/64] 2023-07-13 23:50:55,149 (trainer:732) INFO: 46epoch:train:7601-7700batch: iter_time=1.241e-04, forward_time=0.143, loss_ctc=72.022, loss_att=51.304, acc=0.717, loss=57.519, backward_time=1.028, grad_norm=133.215, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.284e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 23:53:11,294 (trainer:732) INFO: 46epoch:train:7701-7800batch: iter_time=1.063e-04, forward_time=0.144, loss_ctc=67.643, loss_att=47.760, acc=0.736, loss=53.725, backward_time=1.028, grad_norm=130.713, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.284e-05, train_time=2.723 +[gpub002:0/64] 2023-07-13 23:55:28,212 (trainer:732) INFO: 46epoch:train:7801-7900batch: iter_time=1.197e-04, forward_time=0.145, loss_ctc=64.828, loss_att=52.504, acc=0.715, loss=56.201, backward_time=1.026, grad_norm=142.216, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.283e-05, train_time=2.738 +[gpub002:0/64] 2023-07-13 23:57:43,849 (trainer:732) INFO: 46epoch:train:7901-8000batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=67.229, loss_att=51.224, acc=0.726, loss=56.026, backward_time=1.027, grad_norm=115.103, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.283e-05, train_time=2.713 +[gpub002:0/64] 2023-07-13 23:59:59,805 (trainer:732) INFO: 46epoch:train:8001-8100batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=73.639, loss_att=56.195, acc=0.717, loss=61.428, backward_time=1.028, grad_norm=125.297, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.282e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 00:02:20,477 (trainer:732) INFO: 46epoch:train:8101-8200batch: iter_time=1.146e-04, forward_time=0.166, loss_ctc=62.564, loss_att=44.204, acc=0.720, loss=49.712, backward_time=1.037, grad_norm=108.944, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.282e-05, train_time=2.813 +[gpub002:0/64] 2023-07-14 00:04:43,201 (trainer:732) INFO: 46epoch:train:8201-8300batch: iter_time=1.046e-04, forward_time=0.150, loss_ctc=71.384, loss_att=50.456, acc=0.737, loss=56.734, backward_time=1.035, grad_norm=124.346, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.281e-05, train_time=2.854 +[gpub002:0/64] 2023-07-14 00:05:53,094 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-14 00:06:11,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 00:06:14,920 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 00:06:14,920 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-14 00:06:14,927 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 00:13:47,812 (trainer:732) INFO: 46epoch:train:8301-8400batch: iter_time=3.876, forward_time=0.196, loss_ctc=69.752, loss_att=55.635, acc=0.712, loss=59.870, backward_time=1.068, grad_norm=118.005, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.280e-05, train_time=10.891 +[gpub002:0/64] 2023-07-14 00:16:05,245 (trainer:732) INFO: 46epoch:train:8401-8500batch: iter_time=1.318e-04, forward_time=0.145, loss_ctc=67.335, loss_att=48.827, acc=0.725, loss=54.379, backward_time=1.027, grad_norm=107.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.280e-05, train_time=2.749 +[gpub002:0/64] 2023-07-14 00:18:21,297 (trainer:732) INFO: 46epoch:train:8501-8600batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=67.132, loss_att=46.641, acc=0.733, loss=52.788, backward_time=1.028, grad_norm=107.093, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.279e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 00:20:42,957 (trainer:732) INFO: 46epoch:train:8601-8700batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=66.963, loss_att=52.861, acc=0.720, loss=57.092, backward_time=1.048, grad_norm=130.169, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.279e-05, train_time=2.833 +[gpub002:0/64] 2023-07-14 00:23:00,099 (trainer:732) INFO: 46epoch:train:8701-8800batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=70.472, loss_att=52.762, acc=0.723, loss=58.075, backward_time=1.032, grad_norm=139.851, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.278e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 00:25:20,177 (trainer:732) INFO: 46epoch:train:8801-8900batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=71.545, loss_att=54.284, acc=0.718, loss=59.463, backward_time=1.034, grad_norm=122.838, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.277e-05, train_time=2.801 +[gpub002:0/64] 2023-07-14 00:27:37,496 (trainer:732) INFO: 46epoch:train:8901-9000batch: iter_time=1.124e-04, forward_time=0.147, loss_ctc=62.966, loss_att=43.277, acc=0.727, loss=49.184, backward_time=1.032, grad_norm=110.704, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.277e-05, train_time=2.746 +[gpub002:0/64] 2023-07-14 00:29:53,320 (trainer:732) INFO: 46epoch:train:9001-9100batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=71.349, loss_att=50.693, acc=0.732, loss=56.890, backward_time=1.028, grad_norm=112.882, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.276e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 00:31:47,729 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-14 00:32:06,387 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 00:32:09,845 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 00:32:09,845 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-14 00:32:09,852 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 00:38:23,995 (trainer:732) INFO: 46epoch:train:9101-9200batch: iter_time=3.641, forward_time=0.187, loss_ctc=72.415, loss_att=55.674, acc=0.717, loss=60.696, backward_time=1.057, grad_norm=127.477, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.276e-05, train_time=10.213 +[gpub002:0/64] 2023-07-14 00:40:40,945 (trainer:732) INFO: 46epoch:train:9201-9300batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=67.604, loss_att=57.124, acc=0.707, loss=60.268, backward_time=1.028, grad_norm=127.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.275e-05, train_time=2.739 +[gpub002:0/64] 2023-07-14 00:42:57,963 (trainer:732) INFO: 46epoch:train:9301-9400batch: iter_time=1.030e-04, forward_time=0.143, loss_ctc=66.251, loss_att=46.442, acc=0.724, loss=52.385, backward_time=1.027, grad_norm=112.768, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.274e-05, train_time=2.740 +[gpub002:0/64] 2023-07-14 00:45:13,954 (trainer:732) INFO: 46epoch:train:9401-9500batch: iter_time=1.018e-04, forward_time=0.144, loss_ctc=67.687, loss_att=51.450, acc=0.727, loss=56.321, backward_time=1.026, grad_norm=130.442, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.274e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 00:47:30,437 (trainer:732) INFO: 46epoch:train:9501-9600batch: iter_time=1.065e-04, forward_time=0.147, loss_ctc=66.976, loss_att=48.132, acc=0.723, loss=53.785, backward_time=1.028, grad_norm=111.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.273e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 00:49:46,162 (trainer:732) INFO: 46epoch:train:9601-9700batch: iter_time=1.500e-04, forward_time=0.146, loss_ctc=73.129, loss_att=56.047, acc=0.713, loss=61.172, backward_time=1.028, grad_norm=121.197, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.273e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 00:52:01,715 (trainer:732) INFO: 46epoch:train:9701-9800batch: iter_time=1.470e-04, forward_time=0.146, loss_ctc=68.415, loss_att=54.013, acc=0.702, loss=58.333, backward_time=1.027, grad_norm=120.706, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.272e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 00:54:17,220 (trainer:732) INFO: 46epoch:train:9801-9900batch: iter_time=1.497e-04, forward_time=0.146, loss_ctc=63.092, loss_att=45.005, acc=0.726, loss=50.431, backward_time=1.027, grad_norm=115.827, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.272e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 00:56:33,163 (trainer:732) INFO: 46epoch:train:9901-10000batch: iter_time=1.520e-04, forward_time=0.147, loss_ctc=71.355, loss_att=49.992, acc=0.729, loss=56.401, backward_time=1.029, grad_norm=110.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.271e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 01:10:58,653 (trainer:338) INFO: 46epoch results: [train] iter_time=0.292, forward_time=0.151, loss_ctc=68.954, loss_att=51.397, acc=0.719, loss=56.664, backward_time=1.032, grad_norm=125.536, clip=100.000, loss_scale=4.322e+32, optim_step_time=0.183, optim0_lr0=5.300e-05, train_time=3.565, time=4 hours, 57 minutes and 19.06 seconds, total_count=430000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.169, cer_ctc=0.254, loss_att=38.108, acc=0.681, cer=0.392, wer=0.992, loss=39.627, time=8 minutes and 21.07 seconds, total_count=44022, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 49.73 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-14 01:11:14,472 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-14 01:11:14,537 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/41epoch.pth +[gpub002:0/64] 2023-07-14 01:11:14,537 (trainer:272) INFO: 47/50epoch started. Estimated time to finish: 20 hours, 29 minutes and 24.2 seconds +[gpub002:0/64] 2023-07-14 01:11:14,540 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-14 01:11:32,416 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 01:11:35,757 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 01:11:35,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-14 01:11:35,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 01:15:48,585 (trainer:732) INFO: 47epoch:train:1-100batch: iter_time=1.312, forward_time=0.182, loss_ctc=75.825, loss_att=61.109, acc=0.689, loss=65.524, backward_time=1.041, grad_norm=158.964, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.270e-05, train_time=5.481 +[gpub002:0/64] 2023-07-14 01:18:27,141 (trainer:732) INFO: 47epoch:train:101-200batch: iter_time=8.032e-04, forward_time=0.180, loss_ctc=64.299, loss_att=48.764, acc=0.701, loss=53.424, backward_time=1.052, grad_norm=127.674, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.270e-05, train_time=3.170 +[gpub002:0/64] 2023-07-14 01:20:57,571 (trainer:732) INFO: 47epoch:train:201-300batch: iter_time=1.242e-04, forward_time=0.156, loss_ctc=68.624, loss_att=50.174, acc=0.700, loss=55.709, backward_time=1.042, grad_norm=132.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.269e-05, train_time=3.010 +[gpub002:0/64] 2023-07-14 01:23:21,420 (trainer:732) INFO: 47epoch:train:301-400batch: iter_time=1.294e-04, forward_time=0.147, loss_ctc=73.786, loss_att=55.907, acc=0.690, loss=61.271, backward_time=1.039, grad_norm=144.858, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.269e-05, train_time=2.876 +[gpub002:0/64] 2023-07-14 01:25:56,177 (trainer:732) INFO: 47epoch:train:401-500batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=65.105, loss_att=48.139, acc=0.706, loss=53.229, backward_time=1.057, grad_norm=144.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.268e-05, train_time=3.096 +[gpub002:0/64] 2023-07-14 01:28:14,232 (trainer:732) INFO: 47epoch:train:501-600batch: iter_time=1.183e-04, forward_time=0.146, loss_ctc=71.910, loss_att=51.661, acc=0.724, loss=57.735, backward_time=1.032, grad_norm=145.246, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.267e-05, train_time=2.761 +[gpub002:0/64] 2023-07-14 01:30:34,563 (trainer:732) INFO: 47epoch:train:601-700batch: iter_time=1.124e-04, forward_time=0.143, loss_ctc=66.419, loss_att=46.645, acc=0.717, loss=52.577, backward_time=1.042, grad_norm=116.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.267e-05, train_time=2.806 +[gpub002:0/64] 2023-07-14 01:32:55,156 (trainer:732) INFO: 47epoch:train:701-800batch: iter_time=1.111e-04, forward_time=0.144, loss_ctc=75.585, loss_att=57.869, acc=0.701, loss=63.184, backward_time=1.031, grad_norm=132.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.266e-05, train_time=2.812 +[gpub002:0/64] 2023-07-14 01:33:50,705 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-14 01:34:08,334 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 01:34:11,683 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 01:34:11,683 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-14 01:34:11,703 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 01:40:02,226 (trainer:732) INFO: 47epoch:train:801-900batch: iter_time=2.580, forward_time=0.174, loss_ctc=75.755, loss_att=60.417, acc=0.703, loss=65.018, backward_time=1.045, grad_norm=133.637, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.266e-05, train_time=8.541 +[gpub002:0/64] 2023-07-14 01:42:20,381 (trainer:732) INFO: 47epoch:train:901-1000batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=69.051, loss_att=52.908, acc=0.716, loss=57.751, backward_time=1.030, grad_norm=128.359, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.265e-05, train_time=2.763 +[gpub002:0/64] 2023-07-14 01:44:36,647 (trainer:732) INFO: 47epoch:train:1001-1100batch: iter_time=1.291e-04, forward_time=0.145, loss_ctc=67.260, loss_att=48.787, acc=0.713, loss=54.329, backward_time=1.033, grad_norm=133.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.264e-05, train_time=2.725 +[gpub002:0/64] 2023-07-14 01:46:52,658 (trainer:732) INFO: 47epoch:train:1101-1200batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=69.474, loss_att=52.446, acc=0.702, loss=57.555, backward_time=1.027, grad_norm=149.543, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.264e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 01:49:08,730 (trainer:732) INFO: 47epoch:train:1201-1300batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=63.276, loss_att=48.489, acc=0.718, loss=52.925, backward_time=1.028, grad_norm=139.425, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.263e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 01:51:24,328 (trainer:732) INFO: 47epoch:train:1301-1400batch: iter_time=1.570e-04, forward_time=0.145, loss_ctc=72.456, loss_att=49.418, acc=0.727, loss=56.329, backward_time=1.028, grad_norm=130.808, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.263e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 01:53:40,003 (trainer:732) INFO: 47epoch:train:1401-1500batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=63.452, loss_att=48.009, acc=0.722, loss=52.642, backward_time=1.027, grad_norm=121.157, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.262e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 01:55:56,022 (trainer:732) INFO: 47epoch:train:1501-1600batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=78.127, loss_att=57.946, acc=0.718, loss=64.000, backward_time=1.030, grad_norm=110.143, clip=100.000, loss_scale=5.906e+32, optim_step_time=0.183, optim0_lr0=5.262e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 01:57:27,328 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-14 01:57:45,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 01:57:48,763 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 01:57:48,763 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-14 01:57:48,769 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 02:02:30,932 (trainer:732) INFO: 47epoch:train:1601-1700batch: iter_time=1.331, forward_time=0.182, loss_ctc=77.305, loss_att=58.961, acc=0.710, loss=64.464, backward_time=1.043, grad_norm=114.992, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.261e-05, train_time=7.898 +[gpub002:0/64] 2023-07-14 02:04:47,565 (trainer:732) INFO: 47epoch:train:1701-1800batch: iter_time=1.311e-04, forward_time=0.146, loss_ctc=68.654, loss_att=53.205, acc=0.712, loss=57.840, backward_time=1.031, grad_norm=151.268, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.260e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 02:07:03,513 (trainer:732) INFO: 47epoch:train:1801-1900batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=65.765, loss_att=46.697, acc=0.724, loss=52.417, backward_time=1.029, grad_norm=120.537, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.260e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 02:09:20,232 (trainer:732) INFO: 47epoch:train:1901-2000batch: iter_time=1.057e-04, forward_time=0.145, loss_ctc=67.723, loss_att=50.397, acc=0.716, loss=55.595, backward_time=1.030, grad_norm=131.389, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.259e-05, train_time=2.734 +[gpub002:0/64] 2023-07-14 02:11:36,110 (trainer:732) INFO: 47epoch:train:2001-2100batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=69.804, loss_att=52.306, acc=0.713, loss=57.555, backward_time=1.028, grad_norm=112.445, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.259e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 02:13:51,747 (trainer:732) INFO: 47epoch:train:2101-2200batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=69.056, loss_att=49.240, acc=0.712, loss=55.185, backward_time=1.025, grad_norm=113.889, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.258e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 02:16:07,448 (trainer:732) INFO: 47epoch:train:2201-2300batch: iter_time=1.067e-04, forward_time=0.144, loss_ctc=67.405, loss_att=49.335, acc=0.732, loss=54.756, backward_time=1.026, grad_norm=125.340, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.258e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 02:18:23,460 (trainer:732) INFO: 47epoch:train:2301-2400batch: iter_time=1.052e-04, forward_time=0.145, loss_ctc=71.576, loss_att=54.166, acc=0.723, loss=59.389, backward_time=1.028, grad_norm=130.633, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.257e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 02:20:48,284 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-14 02:21:06,460 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 02:21:09,867 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 02:21:09,868 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-14 02:21:09,874 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 02:27:11,938 (trainer:732) INFO: 47epoch:train:2401-2500batch: iter_time=1.320, forward_time=0.168, loss_ctc=72.393, loss_att=54.066, acc=0.713, loss=59.565, backward_time=1.066, grad_norm=115.635, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.256e-05, train_time=10.569 +[gpub002:0/64] 2023-07-14 02:29:29,559 (trainer:732) INFO: 47epoch:train:2501-2600batch: iter_time=1.496e-04, forward_time=0.146, loss_ctc=73.743, loss_att=59.618, acc=0.696, loss=63.855, backward_time=1.035, grad_norm=134.162, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.256e-05, train_time=2.752 +[gpub002:0/64] 2023-07-14 02:31:45,140 (trainer:732) INFO: 47epoch:train:2601-2700batch: iter_time=1.647e-04, forward_time=0.145, loss_ctc=63.864, loss_att=48.114, acc=0.703, loss=52.839, backward_time=1.028, grad_norm=120.583, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.255e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 02:34:00,714 (trainer:732) INFO: 47epoch:train:2701-2800batch: iter_time=1.460e-04, forward_time=0.146, loss_ctc=68.746, loss_att=49.931, acc=0.705, loss=55.575, backward_time=1.027, grad_norm=150.817, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.255e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 02:36:18,845 (trainer:732) INFO: 47epoch:train:2801-2900batch: iter_time=1.734e-04, forward_time=0.168, loss_ctc=70.481, loss_att=52.577, acc=0.699, loss=57.948, backward_time=1.029, grad_norm=134.419, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.254e-05, train_time=2.762 +[gpub002:0/64] 2023-07-14 02:38:42,459 (trainer:732) INFO: 47epoch:train:2901-3000batch: iter_time=1.060e-04, forward_time=0.144, loss_ctc=64.475, loss_att=47.169, acc=0.714, loss=52.361, backward_time=1.057, grad_norm=124.697, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.253e-05, train_time=2.872 +[gpub002:0/64] 2023-07-14 02:41:05,117 (trainer:732) INFO: 47epoch:train:3001-3100batch: iter_time=3.092e-04, forward_time=0.157, loss_ctc=72.537, loss_att=51.195, acc=0.726, loss=57.597, backward_time=1.039, grad_norm=137.411, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.253e-05, train_time=2.853 +[gpub002:0/64] 2023-07-14 02:43:46,956 (trainer:732) INFO: 47epoch:train:3101-3200batch: iter_time=0.002, forward_time=0.194, loss_ctc=64.519, loss_att=46.475, acc=0.719, loss=51.888, backward_time=1.090, grad_norm=114.179, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.200, optim0_lr0=5.252e-05, train_time=3.236 +[gpub002:0/64] 2023-07-14 02:44:01,707 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 02:46:11,080 (trainer:732) INFO: 47epoch:train:3201-3300batch: iter_time=1.368e-04, forward_time=0.146, loss_ctc=75.702, loss_att=56.674, acc=0.706, loss=62.383, backward_time=1.046, grad_norm=152.557, clip=100.000, loss_scale=3.510e+32, optim_step_time=0.183, optim0_lr0=5.252e-05, train_time=2.883 +[gpub002:0/64] 2023-07-14 02:47:14,725 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-14 02:47:32,956 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 02:47:36,422 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 02:47:36,422 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-14 02:47:36,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 02:54:55,647 (trainer:732) INFO: 47epoch:train:3301-3400batch: iter_time=3.444, forward_time=0.146, loss_ctc=70.929, loss_att=56.270, acc=0.703, loss=60.668, backward_time=1.052, grad_norm=114.309, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.251e-05, train_time=10.491 +[gpub002:0/64] 2023-07-14 02:57:12,121 (trainer:732) INFO: 47epoch:train:3401-3500batch: iter_time=1.342e-04, forward_time=0.145, loss_ctc=63.963, loss_att=47.556, acc=0.723, loss=52.478, backward_time=1.031, grad_norm=101.174, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.251e-05, train_time=2.729 +[gpub002:0/64] 2023-07-14 02:59:28,257 (trainer:732) INFO: 47epoch:train:3501-3600batch: iter_time=1.333e-04, forward_time=0.147, loss_ctc=66.806, loss_att=50.043, acc=0.710, loss=55.072, backward_time=1.029, grad_norm=138.787, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.250e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 03:01:48,809 (trainer:732) INFO: 47epoch:train:3601-3700batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=71.928, loss_att=54.464, acc=0.712, loss=59.703, backward_time=1.036, grad_norm=200.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.249e-05, train_time=2.811 +[gpub002:0/64] 2023-07-14 03:04:07,361 (trainer:732) INFO: 47epoch:train:3701-3800batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=67.152, loss_att=46.952, acc=0.720, loss=53.012, backward_time=1.030, grad_norm=148.586, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.249e-05, train_time=2.771 +[gpub002:0/64] 2023-07-14 03:06:28,698 (trainer:732) INFO: 47epoch:train:3801-3900batch: iter_time=1.379e-04, forward_time=0.145, loss_ctc=65.566, loss_att=47.542, acc=0.728, loss=52.949, backward_time=1.052, grad_norm=109.596, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.248e-05, train_time=2.827 +[gpub002:0/64] 2023-07-14 03:08:45,304 (trainer:732) INFO: 47epoch:train:3901-4000batch: iter_time=1.467e-04, forward_time=0.145, loss_ctc=70.262, loss_att=51.105, acc=0.728, loss=56.852, backward_time=1.027, grad_norm=130.886, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.248e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 03:11:01,331 (trainer:732) INFO: 47epoch:train:4001-4100batch: iter_time=1.377e-04, forward_time=0.146, loss_ctc=68.748, loss_att=49.916, acc=0.725, loss=55.566, backward_time=1.029, grad_norm=137.121, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.247e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 03:12:38,470 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-14 03:12:56,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 03:13:00,241 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 03:13:00,241 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-14 03:13:00,247 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 03:19:05,776 (trainer:732) INFO: 47epoch:train:4101-4200batch: iter_time=1.352, forward_time=0.208, loss_ctc=74.165, loss_att=58.149, acc=0.700, loss=62.954, backward_time=1.172, grad_norm=121.785, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.247e-05, train_time=9.688 +[gpub002:0/64] 2023-07-14 03:21:21,928 (trainer:732) INFO: 47epoch:train:4201-4300batch: iter_time=1.398e-04, forward_time=0.145, loss_ctc=65.068, loss_att=49.135, acc=0.702, loss=53.915, backward_time=1.028, grad_norm=131.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.246e-05, train_time=2.723 +[gpub002:0/64] 2023-07-14 03:23:38,012 (trainer:732) INFO: 47epoch:train:4301-4400batch: iter_time=1.409e-04, forward_time=0.144, loss_ctc=67.892, loss_att=51.883, acc=0.698, loss=56.686, backward_time=1.029, grad_norm=124.443, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.245e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 03:25:53,885 (trainer:732) INFO: 47epoch:train:4401-4500batch: iter_time=1.313e-04, forward_time=0.144, loss_ctc=68.165, loss_att=48.760, acc=0.707, loss=54.582, backward_time=1.028, grad_norm=148.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.245e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 03:28:09,352 (trainer:732) INFO: 47epoch:train:4501-4600batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=62.490, loss_att=47.304, acc=0.713, loss=51.860, backward_time=1.026, grad_norm=114.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.244e-05, train_time=2.709 +[gpub002:0/64] 2023-07-14 03:30:25,107 (trainer:732) INFO: 47epoch:train:4601-4700batch: iter_time=1.325e-04, forward_time=0.144, loss_ctc=73.055, loss_att=52.955, acc=0.722, loss=58.985, backward_time=1.027, grad_norm=135.483, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.244e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 03:32:40,456 (trainer:732) INFO: 47epoch:train:4701-4800batch: iter_time=1.270e-04, forward_time=0.143, loss_ctc=64.160, loss_att=45.152, acc=0.720, loss=50.854, backward_time=1.025, grad_norm=152.783, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.243e-05, train_time=2.707 +[gpub002:0/64] 2023-07-14 03:34:55,883 (trainer:732) INFO: 47epoch:train:4801-4900batch: iter_time=1.281e-04, forward_time=0.144, loss_ctc=75.470, loss_att=57.125, acc=0.710, loss=62.628, backward_time=1.026, grad_norm=128.779, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.242e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 03:37:11,333 (trainer:732) INFO: 47epoch:train:4901-5000batch: iter_time=1.184e-04, forward_time=0.145, loss_ctc=68.637, loss_att=53.134, acc=0.699, loss=57.785, backward_time=1.027, grad_norm=135.549, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.242e-05, train_time=2.709 +[gpub002:0/64] 2023-07-14 03:37:13,747 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-14 03:37:32,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 03:37:35,790 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 03:37:35,790 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-14 03:37:35,797 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 03:45:17,483 (trainer:732) INFO: 47epoch:train:5001-5100batch: iter_time=1.349, forward_time=0.238, loss_ctc=74.372, loss_att=58.913, acc=0.696, loss=63.550, backward_time=1.050, grad_norm=124.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.241e-05, train_time=9.723 +[gpub002:0/64] 2023-07-14 03:47:33,846 (trainer:732) INFO: 47epoch:train:5101-5200batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=63.535, loss_att=47.253, acc=0.709, loss=52.138, backward_time=1.028, grad_norm=146.127, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.241e-05, train_time=2.727 +[gpub002:0/64] 2023-07-14 03:49:56,309 (trainer:732) INFO: 47epoch:train:5201-5300batch: iter_time=1.404e-04, forward_time=0.145, loss_ctc=67.170, loss_att=48.352, acc=0.708, loss=53.997, backward_time=1.032, grad_norm=104.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.240e-05, train_time=2.849 +[gpub002:0/64] 2023-07-14 03:52:23,539 (trainer:732) INFO: 47epoch:train:5301-5400batch: iter_time=1.378e-04, forward_time=0.147, loss_ctc=70.328, loss_att=52.677, acc=0.704, loss=57.973, backward_time=1.041, grad_norm=125.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.240e-05, train_time=2.944 +[gpub002:0/64] 2023-07-14 03:54:39,620 (trainer:732) INFO: 47epoch:train:5401-5500batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=63.753, loss_att=46.245, acc=0.719, loss=51.498, backward_time=1.030, grad_norm=101.661, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.239e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 03:56:55,460 (trainer:732) INFO: 47epoch:train:5501-5600batch: iter_time=1.463e-04, forward_time=0.145, loss_ctc=70.240, loss_att=51.409, acc=0.725, loss=57.058, backward_time=1.029, grad_norm=153.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.238e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 03:59:20,845 (trainer:732) INFO: 47epoch:train:5601-5700batch: iter_time=7.024e-04, forward_time=0.186, loss_ctc=63.760, loss_att=44.689, acc=0.728, loss=50.410, backward_time=1.053, grad_norm=104.609, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.238e-05, train_time=2.907 +[gpub002:0/64] 2023-07-14 04:01:56,667 (trainer:732) INFO: 47epoch:train:5701-5800batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=75.062, loss_att=56.213, acc=0.712, loss=61.868, backward_time=1.096, grad_norm=113.891, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.237e-05, train_time=3.117 +[gpub002:0/64] 2023-07-14 04:03:00,524 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-14 04:03:18,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 04:03:22,119 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 04:03:22,120 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-14 04:03:22,126 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 04:08:19,987 (trainer:732) INFO: 47epoch:train:5801-5900batch: iter_time=2.282, forward_time=0.213, loss_ctc=75.092, loss_att=60.659, acc=0.705, loss=64.989, backward_time=1.059, grad_norm=119.456, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.237e-05, train_time=7.666 +[gpub002:0/64] 2023-07-14 04:10:36,391 (trainer:732) INFO: 47epoch:train:5901-6000batch: iter_time=1.312e-04, forward_time=0.146, loss_ctc=66.678, loss_att=50.374, acc=0.726, loss=55.266, backward_time=1.030, grad_norm=136.916, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.236e-05, train_time=2.728 +[gpub002:0/64] 2023-07-14 04:12:52,162 (trainer:732) INFO: 47epoch:train:6001-6100batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=65.131, loss_att=47.955, acc=0.721, loss=53.108, backward_time=1.026, grad_norm=129.089, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.236e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 04:15:07,834 (trainer:732) INFO: 47epoch:train:6101-6200batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=67.860, loss_att=50.576, acc=0.714, loss=55.761, backward_time=1.026, grad_norm=109.883, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.235e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 04:17:23,387 (trainer:732) INFO: 47epoch:train:6201-6300batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=61.925, loss_att=47.562, acc=0.718, loss=51.871, backward_time=1.027, grad_norm=111.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.234e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 04:19:46,670 (trainer:732) INFO: 47epoch:train:6301-6400batch: iter_time=0.005, forward_time=0.187, loss_ctc=70.970, loss_att=49.802, acc=0.728, loss=56.152, backward_time=1.043, grad_norm=134.690, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.234e-05, train_time=2.865 +[gpub002:0/64] 2023-07-14 04:22:02,533 (trainer:732) INFO: 47epoch:train:6401-6500batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=63.773, loss_att=48.033, acc=0.726, loss=52.755, backward_time=1.025, grad_norm=110.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.233e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 04:24:18,474 (trainer:732) INFO: 47epoch:train:6501-6600batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=76.393, loss_att=57.916, acc=0.722, loss=63.459, backward_time=1.028, grad_norm=126.533, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.233e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 04:25:50,082 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-14 04:26:08,645 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 04:26:12,059 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 04:26:12,060 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-14 04:26:12,066 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 04:30:38,669 (trainer:732) INFO: 47epoch:train:6601-6700batch: iter_time=1.313, forward_time=0.145, loss_ctc=74.407, loss_att=56.874, acc=0.714, loss=62.134, backward_time=1.044, grad_norm=148.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.232e-05, train_time=7.604 +[gpub002:0/64] 2023-07-14 04:32:55,250 (trainer:732) INFO: 47epoch:train:6701-6800batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=66.947, loss_att=51.659, acc=0.708, loss=56.246, backward_time=1.029, grad_norm=104.621, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.232e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 04:35:11,755 (trainer:732) INFO: 47epoch:train:6801-6900batch: iter_time=1.132e-04, forward_time=0.145, loss_ctc=64.313, loss_att=47.002, acc=0.716, loss=52.196, backward_time=1.028, grad_norm=98.831, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.231e-05, train_time=2.730 +[gpub002:0/64] 2023-07-14 04:37:28,932 (trainer:732) INFO: 47epoch:train:6901-7000batch: iter_time=3.968e-04, forward_time=0.152, loss_ctc=66.847, loss_att=51.004, acc=0.707, loss=55.757, backward_time=1.032, grad_norm=99.645, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.230e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 04:39:53,468 (trainer:732) INFO: 47epoch:train:7001-7100batch: iter_time=1.192e-04, forward_time=0.143, loss_ctc=67.687, loss_att=49.843, acc=0.716, loss=55.196, backward_time=1.040, grad_norm=119.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.230e-05, train_time=2.891 +[gpub002:0/64] 2023-07-14 04:42:25,260 (trainer:732) INFO: 47epoch:train:7101-7200batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=68.017, loss_att=47.851, acc=0.713, loss=53.900, backward_time=1.067, grad_norm=115.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.229e-05, train_time=3.036 +[gpub002:0/64] 2023-07-14 04:44:41,100 (trainer:732) INFO: 47epoch:train:7201-7300batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=66.541, loss_att=47.903, acc=0.728, loss=53.495, backward_time=1.029, grad_norm=118.808, clip=100.000, loss_scale=6.166e+32, optim_step_time=0.183, optim0_lr0=5.229e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 04:46:57,499 (trainer:732) INFO: 47epoch:train:7301-7400batch: iter_time=1.337e-04, forward_time=0.146, loss_ctc=71.183, loss_att=51.940, acc=0.725, loss=57.713, backward_time=1.029, grad_norm=115.971, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.228e-05, train_time=2.728 +[gpub002:0/64] 2023-07-14 04:49:48,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-14 04:50:07,256 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 04:50:10,689 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 04:50:10,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-14 04:50:10,710 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 04:54:02,831 (trainer:732) INFO: 47epoch:train:7401-7500batch: iter_time=2.585, forward_time=0.260, loss_ctc=71.936, loss_att=53.052, acc=0.710, loss=58.717, backward_time=1.061, grad_norm=137.288, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.188, optim0_lr0=5.228e-05, train_time=8.505 +[gpub002:0/64] 2023-07-14 04:56:20,415 (trainer:732) INFO: 47epoch:train:7501-7600batch: iter_time=1.183e-04, forward_time=0.146, loss_ctc=68.399, loss_att=53.056, acc=0.694, loss=57.658, backward_time=1.034, grad_norm=124.323, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.227e-05, train_time=2.753 +[gpub002:0/64] 2023-07-14 04:58:36,815 (trainer:732) INFO: 47epoch:train:7601-7700batch: iter_time=1.276e-04, forward_time=0.144, loss_ctc=67.099, loss_att=48.016, acc=0.713, loss=53.741, backward_time=1.027, grad_norm=111.682, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.226e-05, train_time=2.728 +[gpub002:0/64] 2023-07-14 05:00:53,216 (trainer:732) INFO: 47epoch:train:7701-7800batch: iter_time=1.241e-04, forward_time=0.144, loss_ctc=67.867, loss_att=52.326, acc=0.704, loss=56.988, backward_time=1.029, grad_norm=134.364, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.226e-05, train_time=2.728 +[gpub002:0/64] 2023-07-14 05:01:20,178 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 05:03:08,612 (trainer:732) INFO: 47epoch:train:7801-7900batch: iter_time=1.185e-04, forward_time=0.144, loss_ctc=63.118, loss_att=45.662, acc=0.720, loss=50.899, backward_time=1.027, grad_norm=101.710, clip=100.000, loss_scale=3.841e+32, optim_step_time=0.182, optim0_lr0=5.225e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 05:05:24,239 (trainer:732) INFO: 47epoch:train:7901-8000batch: iter_time=1.352e-04, forward_time=0.145, loss_ctc=70.809, loss_att=49.978, acc=0.711, loss=56.227, backward_time=1.028, grad_norm=119.098, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.225e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 05:07:39,760 (trainer:732) INFO: 47epoch:train:8001-8100batch: iter_time=1.291e-04, forward_time=0.145, loss_ctc=65.678, loss_att=48.315, acc=0.720, loss=53.524, backward_time=1.028, grad_norm=121.079, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.224e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 05:09:55,413 (trainer:732) INFO: 47epoch:train:8101-8200batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=72.102, loss_att=52.782, acc=0.728, loss=58.578, backward_time=1.027, grad_norm=124.500, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.224e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 05:12:11,383 (trainer:732) INFO: 47epoch:train:8201-8300batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=70.788, loss_att=52.774, acc=0.710, loss=58.178, backward_time=1.031, grad_norm=115.453, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.223e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 05:12:57,530 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-14 05:13:15,615 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 05:13:19,325 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 05:13:19,325 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-14 05:13:19,331 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 05:19:34,885 (trainer:732) INFO: 47epoch:train:8301-8400batch: iter_time=1.320, forward_time=0.157, loss_ctc=69.342, loss_att=56.350, acc=0.696, loss=60.247, backward_time=1.040, grad_norm=126.658, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.222e-05, train_time=8.870 +[gpub002:0/64] 2023-07-14 05:21:52,367 (trainer:732) INFO: 47epoch:train:8401-8500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.969, loss_att=47.931, acc=0.724, loss=52.742, backward_time=1.031, grad_norm=107.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.222e-05, train_time=2.749 +[gpub002:0/64] 2023-07-14 05:24:08,298 (trainer:732) INFO: 47epoch:train:8501-8600batch: iter_time=1.100e-04, forward_time=0.144, loss_ctc=65.267, loss_att=48.804, acc=0.713, loss=53.743, backward_time=1.027, grad_norm=121.119, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.221e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 05:26:24,844 (trainer:732) INFO: 47epoch:train:8601-8700batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=72.559, loss_att=55.089, acc=0.712, loss=60.330, backward_time=1.029, grad_norm=122.276, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.221e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 05:28:40,559 (trainer:732) INFO: 47epoch:train:8701-8800batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=65.539, loss_att=47.000, acc=0.719, loss=52.562, backward_time=1.027, grad_norm=122.552, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.220e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 05:30:56,463 (trainer:732) INFO: 47epoch:train:8801-8900batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=66.154, loss_att=47.927, acc=0.733, loss=53.395, backward_time=1.029, grad_norm=129.162, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.220e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 05:33:12,008 (trainer:732) INFO: 47epoch:train:8901-9000batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=71.851, loss_att=52.837, acc=0.726, loss=58.541, backward_time=1.027, grad_norm=117.162, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.219e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 05:35:27,593 (trainer:732) INFO: 47epoch:train:9001-9100batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=69.806, loss_att=51.495, acc=0.721, loss=56.989, backward_time=1.027, grad_norm=118.907, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.218e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 05:37:02,534 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-14 05:37:20,733 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 05:37:24,243 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 05:37:24,243 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-14 05:37:24,250 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 05:41:19,308 (trainer:732) INFO: 47epoch:train:9101-9200batch: iter_time=1.370, forward_time=0.154, loss_ctc=73.500, loss_att=59.056, acc=0.706, loss=63.389, backward_time=1.043, grad_norm=129.829, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.218e-05, train_time=7.034 +[gpub002:0/64] 2023-07-14 05:43:35,917 (trainer:732) INFO: 47epoch:train:9201-9300batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=66.338, loss_att=51.966, acc=0.717, loss=56.278, backward_time=1.031, grad_norm=120.789, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.217e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 05:45:52,711 (trainer:732) INFO: 47epoch:train:9301-9400batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=64.912, loss_att=46.474, acc=0.726, loss=52.005, backward_time=1.029, grad_norm=133.594, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.217e-05, train_time=2.736 +[gpub002:0/64] 2023-07-14 05:48:13,955 (trainer:732) INFO: 47epoch:train:9401-9500batch: iter_time=1.413e-04, forward_time=0.191, loss_ctc=67.262, loss_att=50.534, acc=0.717, loss=55.552, backward_time=1.033, grad_norm=131.021, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.216e-05, train_time=2.825 +[gpub002:0/64] 2023-07-14 05:50:43,138 (trainer:732) INFO: 47epoch:train:9501-9600batch: iter_time=0.001, forward_time=0.230, loss_ctc=68.713, loss_att=50.464, acc=0.725, loss=55.939, backward_time=1.041, grad_norm=113.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.200, optim0_lr0=5.216e-05, train_time=2.981 +[gpub002:0/64] 2023-07-14 05:53:04,340 (trainer:732) INFO: 47epoch:train:9601-9700batch: iter_time=1.376e-04, forward_time=0.146, loss_ctc=67.075, loss_att=47.370, acc=0.719, loss=53.282, backward_time=1.032, grad_norm=122.282, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.215e-05, train_time=2.826 +[gpub002:0/64] 2023-07-14 05:55:20,063 (trainer:732) INFO: 47epoch:train:9701-9800batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=67.510, loss_att=48.979, acc=0.734, loss=54.538, backward_time=1.028, grad_norm=124.005, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.214e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 05:57:35,588 (trainer:732) INFO: 47epoch:train:9801-9900batch: iter_time=1.115e-04, forward_time=0.143, loss_ctc=69.885, loss_att=51.819, acc=0.728, loss=57.239, backward_time=1.028, grad_norm=112.202, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.214e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 05:59:51,428 (trainer:732) INFO: 47epoch:train:9901-10000batch: iter_time=9.828e-05, forward_time=0.144, loss_ctc=71.596, loss_att=53.648, acc=0.720, loss=59.033, backward_time=1.029, grad_norm=136.522, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.213e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 06:14:05,194 (trainer:338) INFO: 47epoch results: [train] iter_time=0.216, forward_time=0.153, loss_ctc=68.897, loss_att=51.327, acc=0.714, loss=56.598, backward_time=1.037, grad_norm=126.595, clip=100.000, loss_scale=3.991e+32, optim_step_time=0.183, optim0_lr0=5.242e-05, train_time=3.463, time=4 hours, 48 minutes and 59.64 seconds, total_count=440000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.333, cer_ctc=0.254, loss_att=36.942, acc=0.674, cer=0.429, wer=0.998, loss=38.859, time=7 minutes and 55.45 seconds, total_count=45034, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 55.55 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-14 06:14:21,862 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-14 06:14:21,871 (trainer:272) INFO: 48/50epoch started. Estimated time to finish: 15 hours, 20 minutes and 28.01 seconds +[gpub002:0/64] 2023-07-14 06:14:22,214 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-14 06:14:41,401 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 06:14:44,825 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 06:14:44,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub002:0/64] 2023-07-14 06:14:44,832 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 06:25:13,027 (trainer:732) INFO: 48epoch:train:1-100batch: iter_time=5.066, forward_time=0.185, loss_ctc=72.566, loss_att=50.721, acc=0.708, loss=57.275, backward_time=1.043, grad_norm=138.002, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.198, optim0_lr0=5.213e-05, train_time=13.017 +[gpub002:0/64] 2023-07-14 06:27:29,352 (trainer:732) INFO: 48epoch:train:101-200batch: iter_time=1.335e-04, forward_time=0.145, loss_ctc=77.852, loss_att=63.076, acc=0.696, loss=67.509, backward_time=1.031, grad_norm=128.219, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.212e-05, train_time=2.726 +[gpub002:0/64] 2023-07-14 06:29:52,948 (trainer:732) INFO: 48epoch:train:201-300batch: iter_time=1.227e-04, forward_time=0.144, loss_ctc=74.900, loss_att=50.312, acc=0.726, loss=57.688, backward_time=1.027, grad_norm=135.033, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.212e-05, train_time=2.872 +[gpub002:0/64] 2023-07-14 06:32:08,576 (trainer:732) INFO: 48epoch:train:301-400batch: iter_time=1.240e-04, forward_time=0.144, loss_ctc=76.914, loss_att=54.854, acc=0.698, loss=61.472, backward_time=1.026, grad_norm=140.589, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.211e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 06:34:24,747 (trainer:732) INFO: 48epoch:train:401-500batch: iter_time=1.252e-04, forward_time=0.143, loss_ctc=64.489, loss_att=50.286, acc=0.704, loss=54.546, backward_time=1.025, grad_norm=120.535, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.210e-05, train_time=2.723 +[gpub002:0/64] 2023-07-14 06:36:44,732 (trainer:732) INFO: 48epoch:train:501-600batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=72.098, loss_att=53.893, acc=0.695, loss=59.355, backward_time=1.026, grad_norm=121.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.210e-05, train_time=2.799 +[gpub002:0/64] 2023-07-14 06:39:10,733 (trainer:732) INFO: 48epoch:train:601-700batch: iter_time=0.004, forward_time=0.187, loss_ctc=68.179, loss_att=50.186, acc=0.717, loss=55.584, backward_time=1.038, grad_norm=119.134, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.209e-05, train_time=2.919 +[gpub002:0/64] 2023-07-14 06:41:44,082 (trainer:732) INFO: 48epoch:train:701-800batch: iter_time=1.232e-04, forward_time=0.239, loss_ctc=72.896, loss_att=50.389, acc=0.701, loss=57.141, backward_time=1.045, grad_norm=117.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.209e-05, train_time=3.068 +[gpub002:0/64] 2023-07-14 06:42:40,091 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-14 06:42:57,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 06:43:01,178 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 06:43:01,178 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub002:0/64] 2023-07-14 06:43:01,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 06:47:36,689 (trainer:732) INFO: 48epoch:train:801-900batch: iter_time=1.777, forward_time=0.193, loss_ctc=75.968, loss_att=53.040, acc=0.714, loss=59.919, backward_time=1.043, grad_norm=123.686, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.208e-05, train_time=7.051 +[gpub002:0/64] 2023-07-14 06:50:07,555 (trainer:732) INFO: 48epoch:train:901-1000batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=80.428, loss_att=59.359, acc=0.718, loss=65.679, backward_time=1.046, grad_norm=134.626, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.208e-05, train_time=3.018 +[gpub002:0/64] 2023-07-14 06:52:23,798 (trainer:732) INFO: 48epoch:train:1001-1100batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=74.892, loss_att=50.678, acc=0.724, loss=57.942, backward_time=1.031, grad_norm=128.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.207e-05, train_time=2.725 +[gpub002:0/64] 2023-07-14 06:54:40,151 (trainer:732) INFO: 48epoch:train:1101-1200batch: iter_time=1.110e-04, forward_time=0.143, loss_ctc=74.451, loss_att=54.112, acc=0.722, loss=60.214, backward_time=1.030, grad_norm=147.532, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.207e-05, train_time=2.727 +[gpub002:0/64] 2023-07-14 06:56:57,546 (trainer:732) INFO: 48epoch:train:1201-1300batch: iter_time=1.023e-04, forward_time=0.144, loss_ctc=63.785, loss_att=48.558, acc=0.713, loss=53.126, backward_time=1.031, grad_norm=111.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.206e-05, train_time=2.748 +[gpub002:0/64] 2023-07-14 06:59:13,500 (trainer:732) INFO: 48epoch:train:1301-1400batch: iter_time=1.072e-04, forward_time=0.144, loss_ctc=72.696, loss_att=53.103, acc=0.718, loss=58.981, backward_time=1.030, grad_norm=130.057, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.205e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 07:01:29,121 (trainer:732) INFO: 48epoch:train:1401-1500batch: iter_time=1.109e-04, forward_time=0.143, loss_ctc=68.627, loss_att=48.296, acc=0.723, loss=54.395, backward_time=1.027, grad_norm=116.485, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.205e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 07:03:53,791 (trainer:732) INFO: 48epoch:train:1501-1600batch: iter_time=8.219e-04, forward_time=0.204, loss_ctc=67.797, loss_att=49.570, acc=0.714, loss=55.038, backward_time=1.040, grad_norm=129.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.196, optim0_lr0=5.204e-05, train_time=2.893 +[gpub002:0/64] 2023-07-14 07:05:50,238 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-14 07:06:08,419 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 07:06:11,812 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 07:06:11,812 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-14 07:06:11,818 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 07:11:36,453 (trainer:732) INFO: 48epoch:train:1601-1700batch: iter_time=3.165, forward_time=0.201, loss_ctc=77.679, loss_att=56.373, acc=0.700, loss=62.765, backward_time=1.041, grad_norm=134.671, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.204e-05, train_time=9.252 +[gpub002:0/64] 2023-07-14 07:13:53,755 (trainer:732) INFO: 48epoch:train:1701-1800batch: iter_time=1.167e-04, forward_time=0.147, loss_ctc=67.651, loss_att=49.981, acc=0.716, loss=55.282, backward_time=1.033, grad_norm=139.985, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.203e-05, train_time=2.747 +[gpub002:0/64] 2023-07-14 07:16:10,215 (trainer:732) INFO: 48epoch:train:1801-1900batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=81.909, loss_att=60.376, acc=0.710, loss=66.836, backward_time=1.031, grad_norm=150.019, clip=100.000, loss_scale=5.841e+32, optim_step_time=0.182, optim0_lr0=5.203e-05, train_time=2.729 +[gpub002:0/64] 2023-07-14 07:18:26,045 (trainer:732) INFO: 48epoch:train:1901-2000batch: iter_time=9.725e-05, forward_time=0.145, loss_ctc=72.694, loss_att=50.879, acc=0.730, loss=57.424, backward_time=1.029, grad_norm=100.349, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.202e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 07:20:41,800 (trainer:732) INFO: 48epoch:train:2001-2100batch: iter_time=9.471e-05, forward_time=0.144, loss_ctc=75.595, loss_att=54.872, acc=0.713, loss=61.089, backward_time=1.028, grad_norm=135.140, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.201e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 07:22:57,479 (trainer:732) INFO: 48epoch:train:2101-2200batch: iter_time=8.940e-05, forward_time=0.144, loss_ctc=63.218, loss_att=46.176, acc=0.721, loss=51.288, backward_time=1.029, grad_norm=166.606, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.201e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 07:25:13,488 (trainer:732) INFO: 48epoch:train:2201-2300batch: iter_time=9.836e-05, forward_time=0.146, loss_ctc=70.710, loss_att=53.282, acc=0.717, loss=58.510, backward_time=1.031, grad_norm=133.467, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.200e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 07:27:29,318 (trainer:732) INFO: 48epoch:train:2301-2400batch: iter_time=9.628e-05, forward_time=0.144, loss_ctc=69.445, loss_att=49.286, acc=0.727, loss=55.334, backward_time=1.029, grad_norm=133.909, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.200e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 07:28:58,595 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 07:29:44,718 (trainer:732) INFO: 48epoch:train:2401-2500batch: iter_time=9.391e-05, forward_time=0.144, loss_ctc=69.277, loss_att=52.038, acc=0.706, loss=57.209, backward_time=1.028, grad_norm=129.891, clip=100.000, loss_scale=5.364e+32, optim_step_time=0.182, optim0_lr0=5.199e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 07:30:01,213 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-14 07:30:19,332 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 07:30:22,768 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 07:30:22,768 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-14 07:30:22,775 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 07:37:23,008 (trainer:732) INFO: 48epoch:train:2501-2600batch: iter_time=3.157, forward_time=0.175, loss_ctc=64.931, loss_att=49.988, acc=0.701, loss=54.471, backward_time=1.040, grad_norm=125.113, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.199e-05, train_time=9.166 +[gpub002:0/64] 2023-07-14 07:39:39,101 (trainer:732) INFO: 48epoch:train:2601-2700batch: iter_time=9.689e-05, forward_time=0.144, loss_ctc=77.176, loss_att=57.787, acc=0.708, loss=63.604, backward_time=1.027, grad_norm=129.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.198e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 07:41:55,336 (trainer:732) INFO: 48epoch:train:2701-2800batch: iter_time=1.280e-04, forward_time=0.147, loss_ctc=78.705, loss_att=57.307, acc=0.716, loss=63.727, backward_time=1.031, grad_norm=161.522, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.198e-05, train_time=2.724 +[gpub002:0/64] 2023-07-14 07:44:11,382 (trainer:732) INFO: 48epoch:train:2801-2900batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=71.666, loss_att=51.698, acc=0.722, loss=57.689, backward_time=1.029, grad_norm=143.465, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.197e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 07:46:27,152 (trainer:732) INFO: 48epoch:train:2901-3000batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=67.826, loss_att=52.816, acc=0.696, loss=57.319, backward_time=1.028, grad_norm=112.690, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.196e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 07:48:42,648 (trainer:732) INFO: 48epoch:train:3001-3100batch: iter_time=1.373e-04, forward_time=0.146, loss_ctc=62.490, loss_att=46.664, acc=0.714, loss=51.412, backward_time=1.026, grad_norm=110.321, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.196e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 07:50:58,216 (trainer:732) INFO: 48epoch:train:3101-3200batch: iter_time=1.433e-04, forward_time=0.147, loss_ctc=71.138, loss_att=53.409, acc=0.703, loss=58.728, backward_time=1.027, grad_norm=124.701, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.195e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 07:53:13,640 (trainer:732) INFO: 48epoch:train:3201-3300batch: iter_time=1.522e-04, forward_time=0.145, loss_ctc=70.454, loss_att=51.309, acc=0.718, loss=57.052, backward_time=1.025, grad_norm=137.045, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.195e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 07:54:01,692 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-14 07:54:19,955 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 07:54:23,377 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 07:54:23,377 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub002:0/64] 2023-07-14 07:54:23,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 07:59:50,081 (trainer:732) INFO: 48epoch:train:3301-3400batch: iter_time=1.291, forward_time=0.146, loss_ctc=67.251, loss_att=52.714, acc=0.689, loss=57.075, backward_time=1.040, grad_norm=162.610, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.194e-05, train_time=7.929 +[gpub002:0/64] 2023-07-14 08:02:06,659 (trainer:732) INFO: 48epoch:train:3401-3500batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=74.470, loss_att=50.103, acc=0.717, loss=57.413, backward_time=1.030, grad_norm=146.633, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.194e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 08:04:22,707 (trainer:732) INFO: 48epoch:train:3501-3600batch: iter_time=1.132e-04, forward_time=0.144, loss_ctc=78.766, loss_att=59.983, acc=0.709, loss=65.618, backward_time=1.031, grad_norm=139.024, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.193e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 08:06:38,451 (trainer:732) INFO: 48epoch:train:3601-3700batch: iter_time=9.308e-05, forward_time=0.144, loss_ctc=69.199, loss_att=49.395, acc=0.729, loss=55.336, backward_time=1.029, grad_norm=127.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.192e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 08:08:54,104 (trainer:732) INFO: 48epoch:train:3701-3800batch: iter_time=1.209e-04, forward_time=0.144, loss_ctc=71.796, loss_att=51.842, acc=0.698, loss=57.828, backward_time=1.028, grad_norm=149.795, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.192e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 08:09:50,838 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 08:11:09,674 (trainer:732) INFO: 48epoch:train:3801-3900batch: iter_time=1.016e-04, forward_time=0.144, loss_ctc=66.375, loss_att=50.166, acc=0.713, loss=55.029, backward_time=1.028, grad_norm=168.422, clip=100.000, loss_scale=2.285e+32, optim_step_time=0.182, optim0_lr0=5.191e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 08:13:25,076 (trainer:732) INFO: 48epoch:train:3901-4000batch: iter_time=1.125e-04, forward_time=0.143, loss_ctc=72.317, loss_att=55.151, acc=0.699, loss=60.301, backward_time=1.027, grad_norm=131.187, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.191e-05, train_time=2.708 +[gpub002:0/64] 2023-07-14 08:15:40,836 (trainer:732) INFO: 48epoch:train:4001-4100batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=69.182, loss_att=49.859, acc=0.719, loss=55.656, backward_time=1.029, grad_norm=144.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.190e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 08:17:43,061 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub002:0/64] 2023-07-14 08:18:01,046 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 08:18:04,458 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 08:18:04,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub002:0/64] 2023-07-14 08:18:04,464 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 08:23:10,162 (trainer:732) INFO: 48epoch:train:4101-4200batch: iter_time=2.902, forward_time=0.247, loss_ctc=68.952, loss_att=51.746, acc=0.708, loss=56.908, backward_time=1.052, grad_norm=116.379, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.220, optim0_lr0=5.190e-05, train_time=8.986 +[gpub002:0/64] 2023-07-14 08:25:28,112 (trainer:732) INFO: 48epoch:train:4201-4300batch: iter_time=1.154e-04, forward_time=0.148, loss_ctc=75.515, loss_att=51.753, acc=0.724, loss=58.882, backward_time=1.034, grad_norm=126.570, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.189e-05, train_time=2.759 +[gpub002:0/64] 2023-07-14 08:27:44,220 (trainer:732) INFO: 48epoch:train:4301-4400batch: iter_time=1.206e-04, forward_time=0.146, loss_ctc=74.155, loss_att=55.853, acc=0.722, loss=61.344, backward_time=1.031, grad_norm=127.353, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.189e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 08:30:00,444 (trainer:732) INFO: 48epoch:train:4401-4500batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=74.196, loss_att=52.771, acc=0.728, loss=59.198, backward_time=1.032, grad_norm=142.333, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.188e-05, train_time=2.724 +[gpub002:0/64] 2023-07-14 08:32:16,273 (trainer:732) INFO: 48epoch:train:4501-4600batch: iter_time=1.142e-04, forward_time=0.145, loss_ctc=69.251, loss_att=50.837, acc=0.715, loss=56.361, backward_time=1.029, grad_norm=131.558, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.187e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 08:34:31,949 (trainer:732) INFO: 48epoch:train:4601-4700batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=58.768, loss_att=43.134, acc=0.731, loss=47.825, backward_time=1.028, grad_norm=109.747, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.187e-05, train_time=2.713 +[gpub002:0/64] 2023-07-14 08:36:47,789 (trainer:732) INFO: 48epoch:train:4701-4800batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=71.818, loss_att=54.359, acc=0.714, loss=59.597, backward_time=1.029, grad_norm=139.913, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.186e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 08:39:03,632 (trainer:732) INFO: 48epoch:train:4801-4900batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=69.204, loss_att=51.952, acc=0.719, loss=57.128, backward_time=1.029, grad_norm=128.452, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.186e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 08:41:19,530 (trainer:732) INFO: 48epoch:train:4901-5000batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=76.342, loss_att=54.027, acc=0.704, loss=60.721, backward_time=1.030, grad_norm=125.370, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.185e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 08:41:22,274 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub002:0/64] 2023-07-14 08:41:40,677 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 08:41:44,120 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 08:41:44,120 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub002:0/64] 2023-07-14 08:41:44,127 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 08:47:46,295 (trainer:732) INFO: 48epoch:train:5001-5100batch: iter_time=1.324, forward_time=0.145, loss_ctc=64.448, loss_att=47.570, acc=0.724, loss=52.633, backward_time=1.044, grad_norm=141.410, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.185e-05, train_time=7.735 +[gpub002:0/64] 2023-07-14 08:50:02,881 (trainer:732) INFO: 48epoch:train:5101-5200batch: iter_time=8.791e-05, forward_time=0.143, loss_ctc=76.350, loss_att=54.968, acc=0.718, loss=61.383, backward_time=1.029, grad_norm=120.961, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.184e-05, train_time=2.731 +[gpub002:0/64] 2023-07-14 08:52:20,062 (trainer:732) INFO: 48epoch:train:5201-5300batch: iter_time=8.431e-05, forward_time=0.145, loss_ctc=78.698, loss_att=54.190, acc=0.731, loss=61.542, backward_time=1.034, grad_norm=145.742, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.184e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 08:54:36,566 (trainer:732) INFO: 48epoch:train:5301-5400batch: iter_time=8.884e-05, forward_time=0.144, loss_ctc=71.014, loss_att=50.305, acc=0.733, loss=56.518, backward_time=1.031, grad_norm=149.754, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.183e-05, train_time=2.730 +[gpub002:0/64] 2023-07-14 08:56:54,010 (trainer:732) INFO: 48epoch:train:5401-5500batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=66.905, loss_att=50.167, acc=0.717, loss=55.189, backward_time=1.029, grad_norm=107.271, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.182e-05, train_time=2.749 +[gpub002:0/64] 2023-07-14 08:59:13,151 (trainer:732) INFO: 48epoch:train:5501-5600batch: iter_time=1.282e-04, forward_time=0.145, loss_ctc=62.122, loss_att=44.641, acc=0.726, loss=49.885, backward_time=1.028, grad_norm=133.648, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.182e-05, train_time=2.783 +[gpub002:0/64] 2023-07-14 09:01:33,741 (trainer:732) INFO: 48epoch:train:5601-5700batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=69.735, loss_att=53.019, acc=0.721, loss=58.034, backward_time=1.038, grad_norm=127.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.181e-05, train_time=2.812 +[gpub002:0/64] 2023-07-14 09:03:52,868 (trainer:732) INFO: 48epoch:train:5701-5800batch: iter_time=1.326e-04, forward_time=0.146, loss_ctc=69.218, loss_att=50.409, acc=0.725, loss=56.052, backward_time=1.035, grad_norm=115.986, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.181e-05, train_time=2.782 +[gpub002:0/64] 2023-07-14 09:04:41,018 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub002:0/64] 2023-07-14 09:04:59,726 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 09:05:03,191 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 09:05:03,191 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub002:0/64] 2023-07-14 09:05:03,197 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 09:11:40,827 (trainer:732) INFO: 48epoch:train:5801-5900batch: iter_time=3.228, forward_time=0.187, loss_ctc=64.451, loss_att=51.138, acc=0.698, loss=55.132, backward_time=1.041, grad_norm=126.975, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.180e-05, train_time=9.358 +[gpub002:0/64] 2023-07-14 09:13:57,967 (trainer:732) INFO: 48epoch:train:5901-6000batch: iter_time=1.175e-04, forward_time=0.144, loss_ctc=74.538, loss_att=51.602, acc=0.714, loss=58.483, backward_time=1.028, grad_norm=138.164, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.180e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 09:16:16,645 (trainer:732) INFO: 48epoch:train:6001-6100batch: iter_time=7.038e-04, forward_time=0.146, loss_ctc=78.354, loss_att=60.750, acc=0.709, loss=66.031, backward_time=1.033, grad_norm=141.686, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.179e-05, train_time=2.773 +[gpub002:0/64] 2023-07-14 09:18:33,621 (trainer:732) INFO: 48epoch:train:6101-6200batch: iter_time=1.237e-04, forward_time=0.146, loss_ctc=69.005, loss_att=49.078, acc=0.732, loss=55.056, backward_time=1.031, grad_norm=107.431, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=5.179e-05, train_time=2.739 +[gpub002:0/64] 2023-07-14 09:20:49,845 (trainer:732) INFO: 48epoch:train:6201-6300batch: iter_time=1.292e-04, forward_time=0.145, loss_ctc=71.735, loss_att=50.224, acc=0.706, loss=56.677, backward_time=1.031, grad_norm=145.134, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.178e-05, train_time=2.724 +[gpub002:0/64] 2023-07-14 09:23:24,114 (trainer:732) INFO: 48epoch:train:6301-6400batch: iter_time=1.285e-04, forward_time=0.283, loss_ctc=66.846, loss_att=50.323, acc=0.714, loss=55.280, backward_time=1.045, grad_norm=114.520, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=5.177e-05, train_time=3.085 +[gpub002:0/64] 2023-07-14 09:25:40,456 (trainer:732) INFO: 48epoch:train:6401-6500batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=72.179, loss_att=54.681, acc=0.704, loss=59.930, backward_time=1.029, grad_norm=148.277, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.177e-05, train_time=2.727 +[gpub002:0/64] 2023-07-14 09:28:20,556 (trainer:732) INFO: 48epoch:train:6501-6600batch: iter_time=1.396e-04, forward_time=0.146, loss_ctc=67.464, loss_att=48.020, acc=0.724, loss=53.853, backward_time=1.113, grad_norm=125.636, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.176e-05, train_time=3.202 +[gpub002:0/64] 2023-07-14 09:30:11,734 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub002:0/64] 2023-07-14 09:30:29,893 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 09:30:33,364 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 09:30:33,364 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-14 09:30:33,370 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 09:34:29,820 (trainer:732) INFO: 48epoch:train:6601-6700batch: iter_time=2.210, forward_time=0.145, loss_ctc=65.650, loss_att=48.209, acc=0.712, loss=53.441, backward_time=1.036, grad_norm=123.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.176e-05, train_time=7.385 +[gpub002:0/64] 2023-07-14 09:36:46,637 (trainer:732) INFO: 48epoch:train:6701-6800batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=68.915, loss_att=49.606, acc=0.726, loss=55.399, backward_time=1.031, grad_norm=124.451, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.175e-05, train_time=2.736 +[gpub002:0/64] 2023-07-14 09:39:03,225 (trainer:732) INFO: 48epoch:train:6801-6900batch: iter_time=1.266e-04, forward_time=0.146, loss_ctc=79.366, loss_att=60.971, acc=0.714, loss=66.490, backward_time=1.031, grad_norm=177.915, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.175e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 09:41:26,301 (trainer:732) INFO: 48epoch:train:6901-7000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=71.703, loss_att=47.655, acc=0.738, loss=54.869, backward_time=1.033, grad_norm=118.979, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.174e-05, train_time=2.861 +[gpub002:0/64] 2023-07-14 09:43:47,692 (trainer:732) INFO: 48epoch:train:7001-7100batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=74.135, loss_att=51.319, acc=0.722, loss=58.164, backward_time=1.037, grad_norm=130.583, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.174e-05, train_time=2.828 +[gpub002:0/64] 2023-07-14 09:46:22,408 (trainer:732) INFO: 48epoch:train:7101-7200batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=61.004, loss_att=45.940, acc=0.727, loss=50.459, backward_time=1.063, grad_norm=116.241, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.173e-05, train_time=3.094 +[gpub002:0/64] 2023-07-14 09:48:38,387 (trainer:732) INFO: 48epoch:train:7201-7300batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=71.858, loss_att=52.784, acc=0.722, loss=58.507, backward_time=1.030, grad_norm=147.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.172e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 09:51:09,299 (trainer:732) INFO: 48epoch:train:7301-7400batch: iter_time=1.101e-04, forward_time=0.147, loss_ctc=69.932, loss_att=48.151, acc=0.732, loss=54.685, backward_time=1.064, grad_norm=130.713, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.172e-05, train_time=3.018 +[gpub002:0/64] 2023-07-14 09:53:26,312 (trainer:732) INFO: 48epoch:train:7401-7500batch: iter_time=1.054e-04, forward_time=0.146, loss_ctc=67.229, loss_att=50.273, acc=0.714, loss=55.360, backward_time=1.031, grad_norm=147.697, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.171e-05, train_time=2.740 +[gpub002:0/64] 2023-07-14 09:53:28,551 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub002:0/64] 2023-07-14 09:53:46,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 09:53:50,086 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 09:53:50,086 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-14 09:53:50,092 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 10:00:46,448 (trainer:732) INFO: 48epoch:train:7501-7600batch: iter_time=1.330, forward_time=0.146, loss_ctc=64.829, loss_att=48.046, acc=0.710, loss=53.081, backward_time=1.043, grad_norm=121.945, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.171e-05, train_time=8.803 +[gpub002:0/64] 2023-07-14 10:03:03,225 (trainer:732) INFO: 48epoch:train:7601-7700batch: iter_time=1.167e-04, forward_time=0.145, loss_ctc=73.763, loss_att=54.433, acc=0.716, loss=60.232, backward_time=1.031, grad_norm=121.388, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.170e-05, train_time=2.735 +[gpub002:0/64] 2023-07-14 10:05:19,335 (trainer:732) INFO: 48epoch:train:7701-7800batch: iter_time=1.329e-04, forward_time=0.145, loss_ctc=77.979, loss_att=56.628, acc=0.718, loss=63.034, backward_time=1.030, grad_norm=145.939, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.170e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 10:07:35,239 (trainer:732) INFO: 48epoch:train:7801-7900batch: iter_time=1.459e-04, forward_time=0.144, loss_ctc=70.685, loss_att=50.240, acc=0.729, loss=56.374, backward_time=1.030, grad_norm=121.249, clip=100.000, loss_scale=2.564e+32, optim_step_time=0.182, optim0_lr0=5.169e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 10:10:05,010 (trainer:732) INFO: 48epoch:train:7901-8000batch: iter_time=1.328e-04, forward_time=0.239, loss_ctc=67.160, loss_att=50.782, acc=0.700, loss=55.695, backward_time=1.051, grad_norm=128.964, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.169e-05, train_time=2.995 +[gpub002:0/64] 2023-07-14 10:12:20,278 (trainer:732) INFO: 48epoch:train:8001-8100batch: iter_time=1.339e-04, forward_time=0.144, loss_ctc=63.723, loss_att=47.622, acc=0.712, loss=52.453, backward_time=1.025, grad_norm=112.150, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.168e-05, train_time=2.705 +[gpub002:0/64] 2023-07-14 10:14:36,085 (trainer:732) INFO: 48epoch:train:8101-8200batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=74.529, loss_att=54.062, acc=0.708, loss=60.202, backward_time=1.027, grad_norm=108.241, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.167e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 10:16:52,011 (trainer:732) INFO: 48epoch:train:8201-8300batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=68.909, loss_att=49.305, acc=0.729, loss=55.186, backward_time=1.028, grad_norm=144.843, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.167e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 10:17:38,915 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub002:0/64] 2023-07-14 10:17:57,252 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 10:18:00,656 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 10:18:00,656 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-14 10:18:00,663 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 10:22:44,622 (trainer:732) INFO: 48epoch:train:8301-8400batch: iter_time=2.087, forward_time=0.144, loss_ctc=66.298, loss_att=48.900, acc=0.708, loss=54.119, backward_time=1.041, grad_norm=129.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.166e-05, train_time=7.052 +[gpub002:0/64] 2023-07-14 10:25:04,181 (trainer:732) INFO: 48epoch:train:8401-8500batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=79.695, loss_att=59.887, acc=0.715, loss=65.829, backward_time=1.030, grad_norm=136.298, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.166e-05, train_time=2.791 +[gpub002:0/64] 2023-07-14 10:27:20,855 (trainer:732) INFO: 48epoch:train:8501-8600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=73.605, loss_att=52.399, acc=0.724, loss=58.761, backward_time=1.029, grad_norm=129.008, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.165e-05, train_time=2.733 +[gpub002:0/64] 2023-07-14 10:29:36,805 (trainer:732) INFO: 48epoch:train:8601-8700batch: iter_time=1.263e-04, forward_time=0.145, loss_ctc=73.931, loss_att=52.596, acc=0.720, loss=58.997, backward_time=1.027, grad_norm=123.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.165e-05, train_time=2.719 +[gpub002:0/64] 2023-07-14 10:31:52,421 (trainer:732) INFO: 48epoch:train:8701-8800batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=62.565, loss_att=48.800, acc=0.699, loss=52.930, backward_time=1.025, grad_norm=111.238, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.164e-05, train_time=2.712 +[gpub002:0/64] 2023-07-14 10:34:08,232 (trainer:732) INFO: 48epoch:train:8801-8900batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=71.599, loss_att=54.096, acc=0.705, loss=59.347, backward_time=1.027, grad_norm=114.515, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.164e-05, train_time=2.716 +[gpub002:0/64] 2023-07-14 10:36:23,781 (trainer:732) INFO: 48epoch:train:8901-9000batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=67.382, loss_att=47.060, acc=0.724, loss=53.156, backward_time=1.026, grad_norm=109.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.163e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 10:38:39,320 (trainer:732) INFO: 48epoch:train:9001-9100batch: iter_time=1.261e-04, forward_time=0.144, loss_ctc=65.844, loss_att=47.748, acc=0.721, loss=53.177, backward_time=1.027, grad_norm=117.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.163e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 10:40:20,761 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub002:0/64] 2023-07-14 10:40:38,784 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 10:40:42,509 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 10:40:42,509 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub002:0/64] 2023-07-14 10:40:42,516 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 10:44:51,880 (trainer:732) INFO: 48epoch:train:9101-9200batch: iter_time=2.166, forward_time=0.173, loss_ctc=76.442, loss_att=55.319, acc=0.703, loss=61.656, backward_time=1.038, grad_norm=122.819, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.162e-05, train_time=7.451 +[gpub002:0/64] 2023-07-14 10:47:08,550 (trainer:732) INFO: 48epoch:train:9201-9300batch: iter_time=1.199e-04, forward_time=0.144, loss_ctc=68.780, loss_att=49.932, acc=0.724, loss=55.586, backward_time=1.031, grad_norm=107.542, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.161e-05, train_time=2.733 +[gpub002:0/64] 2023-07-14 10:49:26,299 (trainer:732) INFO: 48epoch:train:9301-9400batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=79.867, loss_att=61.875, acc=0.714, loss=67.273, backward_time=1.031, grad_norm=156.270, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.161e-05, train_time=2.755 +[gpub002:0/64] 2023-07-14 10:51:42,933 (trainer:732) INFO: 48epoch:train:9401-9500batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=72.470, loss_att=48.680, acc=0.737, loss=55.817, backward_time=1.029, grad_norm=122.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.160e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 10:53:58,937 (trainer:732) INFO: 48epoch:train:9501-9600batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=70.022, loss_att=50.699, acc=0.724, loss=56.496, backward_time=1.029, grad_norm=132.905, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.160e-05, train_time=2.720 +[gpub002:0/64] 2023-07-14 10:56:14,682 (trainer:732) INFO: 48epoch:train:9601-9700batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=60.015, loss_att=45.859, acc=0.727, loss=50.106, backward_time=1.028, grad_norm=143.949, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.159e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 10:58:33,862 (trainer:732) INFO: 48epoch:train:9701-9800batch: iter_time=1.277e-04, forward_time=0.146, loss_ctc=70.164, loss_att=53.081, acc=0.720, loss=58.206, backward_time=1.036, grad_norm=114.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.159e-05, train_time=2.783 +[gpub002:0/64] 2023-07-14 11:00:52,286 (trainer:732) INFO: 48epoch:train:9801-9900batch: iter_time=1.221e-04, forward_time=0.147, loss_ctc=67.592, loss_att=47.935, acc=0.733, loss=53.832, backward_time=1.034, grad_norm=113.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.158e-05, train_time=2.768 +[gpub002:0/64] 2023-07-14 11:03:07,869 (trainer:732) INFO: 48epoch:train:9901-10000batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=66.172, loss_att=49.889, acc=0.716, loss=54.774, backward_time=1.028, grad_norm=112.218, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.158e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 11:17:21,394 (trainer:338) INFO: 48epoch results: [train] iter_time=0.297, forward_time=0.153, loss_ctc=70.855, loss_att=51.822, acc=0.716, loss=57.532, backward_time=1.033, grad_norm=130.378, clip=100.000, loss_scale=2.805e+32, optim_step_time=0.183, optim0_lr0=5.185e-05, train_time=3.465, time=4 hours, 49 minutes and 0.35 seconds, total_count=450000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=42.674, cer_ctc=0.250, loss_att=37.042, acc=0.682, cer=0.410, wer=0.996, loss=38.731, time=7 minutes and 58.92 seconds, total_count=46046, gpu_max_cached_mem_GB=37.574, [att_plot] time=6 minutes and 0.19 seconds, total_count=0, gpu_max_cached_mem_GB=37.574 +[gpub002:0/64] 2023-07-14 11:17:37,205 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub002:0/64] 2023-07-14 11:17:37,216 (trainer:272) INFO: 49/50epoch started. Estimated time to finish: 10 hours, 12 minutes and 51.12 seconds +[gpub002:0/64] 2023-07-14 11:17:37,220 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub002:0/64] 2023-07-14 11:17:55,035 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 11:17:58,436 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 11:17:58,436 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub002:0/64] 2023-07-14 11:17:58,442 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 11:23:36,393 (trainer:732) INFO: 49epoch:train:1-100batch: iter_time=1.992, forward_time=0.180, loss_ctc=75.424, loss_att=56.205, acc=0.707, loss=61.970, backward_time=1.067, grad_norm=126.161, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.157e-05, train_time=7.183 +[gpub002:0/64] 2023-07-14 11:25:52,971 (trainer:732) INFO: 49epoch:train:101-200batch: iter_time=1.338e-04, forward_time=0.145, loss_ctc=78.109, loss_att=58.369, acc=0.696, loss=64.291, backward_time=1.030, grad_norm=156.563, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.156e-05, train_time=2.732 +[gpub002:0/64] 2023-07-14 11:28:10,045 (trainer:732) INFO: 49epoch:train:201-300batch: iter_time=1.411e-04, forward_time=0.145, loss_ctc=71.274, loss_att=53.833, acc=0.706, loss=59.065, backward_time=1.031, grad_norm=117.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.156e-05, train_time=2.741 +[gpub002:0/64] 2023-07-14 11:30:33,978 (trainer:732) INFO: 49epoch:train:301-400batch: iter_time=1.382e-04, forward_time=0.143, loss_ctc=82.935, loss_att=67.130, acc=0.686, loss=71.872, backward_time=1.041, grad_norm=143.181, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.155e-05, train_time=2.878 +[gpub002:0/64] 2023-07-14 11:33:11,725 (trainer:732) INFO: 49epoch:train:401-500batch: iter_time=1.370e-04, forward_time=0.144, loss_ctc=67.558, loss_att=49.800, acc=0.725, loss=55.128, backward_time=1.047, grad_norm=137.364, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.155e-05, train_time=3.155 +[gpub002:0/64] 2023-07-14 11:35:47,703 (trainer:732) INFO: 49epoch:train:501-600batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=67.201, loss_att=46.260, acc=0.720, loss=52.542, backward_time=1.045, grad_norm=114.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.154e-05, train_time=3.119 +[gpub002:0/64] 2023-07-14 11:38:18,577 (trainer:732) INFO: 49epoch:train:601-700batch: iter_time=1.325e-04, forward_time=0.150, loss_ctc=70.668, loss_att=51.546, acc=0.714, loss=57.283, backward_time=1.040, grad_norm=119.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.154e-05, train_time=3.017 +[gpub002:0/64] 2023-07-14 11:40:45,549 (trainer:732) INFO: 49epoch:train:701-800batch: iter_time=1.343e-04, forward_time=0.144, loss_ctc=61.423, loss_att=43.823, acc=0.717, loss=49.103, backward_time=1.037, grad_norm=107.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.153e-05, train_time=2.939 +[gpub002:0/64] 2023-07-14 11:41:43,214 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub002:0/64] 2023-07-14 11:42:01,107 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 11:42:04,459 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 11:42:04,459 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub002:0/64] 2023-07-14 11:42:04,480 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 11:48:17,184 (trainer:732) INFO: 49epoch:train:801-900batch: iter_time=2.782, forward_time=0.203, loss_ctc=83.766, loss_att=63.502, acc=0.709, loss=69.582, backward_time=1.053, grad_norm=161.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.153e-05, train_time=9.032 +[gpub002:0/64] 2023-07-14 11:50:46,902 (trainer:732) INFO: 49epoch:train:901-1000batch: iter_time=9.525e-05, forward_time=0.144, loss_ctc=74.996, loss_att=53.416, acc=0.702, loss=59.890, backward_time=1.043, grad_norm=131.720, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.152e-05, train_time=2.994 +[gpub002:0/64] 2023-07-14 11:53:03,014 (trainer:732) INFO: 49epoch:train:1001-1100batch: iter_time=8.920e-05, forward_time=0.144, loss_ctc=71.753, loss_att=56.326, acc=0.702, loss=60.954, backward_time=1.032, grad_norm=130.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.152e-05, train_time=2.722 +[gpub002:0/64] 2023-07-14 11:55:18,875 (trainer:732) INFO: 49epoch:train:1101-1200batch: iter_time=1.102e-04, forward_time=0.143, loss_ctc=79.119, loss_att=61.363, acc=0.706, loss=66.690, backward_time=1.030, grad_norm=115.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.151e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 11:57:34,399 (trainer:732) INFO: 49epoch:train:1201-1300batch: iter_time=1.012e-04, forward_time=0.144, loss_ctc=72.908, loss_att=53.208, acc=0.712, loss=59.118, backward_time=1.028, grad_norm=115.476, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.150e-05, train_time=2.710 +[gpub002:0/64] 2023-07-14 11:59:49,721 (trainer:732) INFO: 49epoch:train:1301-1400batch: iter_time=1.097e-04, forward_time=0.142, loss_ctc=61.698, loss_att=43.601, acc=0.720, loss=49.030, backward_time=1.027, grad_norm=118.028, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.150e-05, train_time=2.706 +[gpub002:0/64] 2023-07-14 12:02:24,426 (trainer:732) INFO: 49epoch:train:1401-1500batch: iter_time=0.010, forward_time=0.243, loss_ctc=69.799, loss_att=51.631, acc=0.718, loss=57.082, backward_time=1.055, grad_norm=137.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.201, optim0_lr0=5.149e-05, train_time=3.093 +[gpub002:0/64] 2023-07-14 12:04:40,243 (trainer:732) INFO: 49epoch:train:1501-1600batch: iter_time=1.270e-04, forward_time=0.145, loss_ctc=62.191, loss_att=43.167, acc=0.720, loss=48.874, backward_time=1.028, grad_norm=110.624, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.149e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 12:06:21,070 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub002:0/64] 2023-07-14 12:06:39,230 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 12:06:42,623 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 12:06:42,623 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub002:0/64] 2023-07-14 12:06:42,629 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 12:10:28,549 (trainer:732) INFO: 49epoch:train:1601-1700batch: iter_time=1.995, forward_time=0.145, loss_ctc=86.888, loss_att=63.993, acc=0.702, loss=70.861, backward_time=1.042, grad_norm=144.375, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.148e-05, train_time=6.966 +[gpub002:0/64] 2023-07-14 12:12:46,571 (trainer:732) INFO: 49epoch:train:1701-1800batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=72.288, loss_att=55.796, acc=0.708, loss=60.743, backward_time=1.033, grad_norm=127.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.148e-05, train_time=2.760 +[gpub002:0/64] 2023-07-14 12:15:04,541 (trainer:732) INFO: 49epoch:train:1801-1900batch: iter_time=1.254e-04, forward_time=0.146, loss_ctc=72.461, loss_att=50.750, acc=0.721, loss=57.264, backward_time=1.031, grad_norm=124.107, clip=100.000, loss_scale=5.127e+32, optim_step_time=0.182, optim0_lr0=5.147e-05, train_time=2.759 +[gpub002:0/64] 2023-07-14 12:17:21,880 (trainer:732) INFO: 49epoch:train:1901-2000batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=77.931, loss_att=60.068, acc=0.711, loss=65.427, backward_time=1.031, grad_norm=134.506, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.147e-05, train_time=2.747 +[gpub002:0/64] 2023-07-14 12:19:44,196 (trainer:732) INFO: 49epoch:train:2001-2100batch: iter_time=1.327e-04, forward_time=0.145, loss_ctc=74.295, loss_att=58.950, acc=0.728, loss=63.554, backward_time=1.047, grad_norm=139.921, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.146e-05, train_time=2.846 +[gpub002:0/64] 2023-07-14 12:21:47,756 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub002:0/64] 2023-07-14 12:22:28,541 (trainer:732) INFO: 49epoch:train:2101-2200batch: iter_time=2.902e-04, forward_time=0.195, loss_ctc=66.147, loss_att=47.940, acc=0.731, loss=53.402, backward_time=1.122, grad_norm=127.949, clip=100.000, loss_scale=5.497e+32, optim_step_time=0.196, optim0_lr0=5.146e-05, train_time=3.286 +[gpub002:0/64] 2023-07-14 12:24:44,739 (trainer:732) INFO: 49epoch:train:2201-2300batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=63.495, loss_att=43.732, acc=0.732, loss=49.661, backward_time=1.028, grad_norm=107.717, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.145e-05, train_time=2.724 +[gpub002:0/64] 2023-07-14 12:27:00,588 (trainer:732) INFO: 49epoch:train:2301-2400batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=70.905, loss_att=53.186, acc=0.722, loss=58.502, backward_time=1.028, grad_norm=119.707, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.144e-05, train_time=2.717 +[gpub002:0/64] 2023-07-14 12:29:16,288 (trainer:732) INFO: 49epoch:train:2401-2500batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=71.254, loss_att=49.884, acc=0.724, loss=56.295, backward_time=1.028, grad_norm=137.105, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.144e-05, train_time=2.714 +[gpub002:0/64] 2023-07-14 12:29:36,316 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub002:0/64] 2023-07-14 12:29:55,262 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 12:29:58,726 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 12:29:58,726 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub002:0/64] 2023-07-14 12:29:58,784 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 12:36:52,242 (trainer:732) INFO: 49epoch:train:2501-2600batch: iter_time=3.018, forward_time=0.183, loss_ctc=77.934, loss_att=56.931, acc=0.709, loss=63.232, backward_time=1.042, grad_norm=160.805, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.143e-05, train_time=9.116 +[gpub002:0/64] 2023-07-14 12:39:09,252 (trainer:732) INFO: 49epoch:train:2601-2700batch: iter_time=1.261e-04, forward_time=0.144, loss_ctc=76.698, loss_att=56.198, acc=0.715, loss=62.348, backward_time=1.031, grad_norm=159.881, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.143e-05, train_time=2.743 +[gpub002:0/64] 2023-07-14 12:41:25,308 (trainer:732) INFO: 49epoch:train:2701-2800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=70.984, loss_att=50.376, acc=0.725, loss=56.558, backward_time=1.031, grad_norm=114.170, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.142e-05, train_time=2.721 +[gpub002:0/64] 2023-07-14 12:43:41,674 (trainer:732) INFO: 49epoch:train:2801-2900batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=80.056, loss_att=64.744, acc=0.711, loss=69.337, backward_time=1.033, grad_norm=127.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.142e-05, train_time=2.727 +[gpub002:0/64] 2023-07-14 12:45:57,246 (trainer:732) INFO: 49epoch:train:2901-3000batch: iter_time=1.143e-04, forward_time=0.144, loss_ctc=67.599, loss_att=50.304, acc=0.735, loss=55.492, backward_time=1.027, grad_norm=117.099, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.141e-05, train_time=2.711 +[gpub002:0/64] 2023-07-14 12:48:13,149 (trainer:732) INFO: 49epoch:train:3001-3100batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=64.905, loss_att=44.347, acc=0.736, loss=50.514, backward_time=1.030, grad_norm=130.061, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.141e-05, train_time=2.718 +[gpub002:0/64] 2023-07-14 12:50:30,467 (trainer:732) INFO: 49epoch:train:3101-3200batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=68.429, loss_att=50.737, acc=0.731, loss=56.045, backward_time=1.028, grad_norm=111.368, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.140e-05, train_time=2.746 +[gpub002:0/64] 2023-07-14 12:52:46,238 (trainer:732) INFO: 49epoch:train:3201-3300batch: iter_time=1.227e-04, forward_time=0.144, loss_ctc=62.564, loss_att=44.638, acc=0.725, loss=50.016, backward_time=1.028, grad_norm=110.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.140e-05, train_time=2.715 +[gpub002:0/64] 2023-07-14 12:53:46,523 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub002:0/64] 2023-07-14 12:54:05,029 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub002:0/64] 2023-07-14 12:54:08,395 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub002:0/64] 2023-07-14 12:54:08,395 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub002:0/64] 2023-07-14 12:54:08,401 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub002:0/64] 2023-07-14 12:59:16,516 (trainer:732) INFO: 49epoch:train:3301-3400batch: iter_time=1.682, forward_time=0.171, loss_ctc=82.313, loss_att=58.467, acc=0.718, loss=65.621, backward_time=1.042, grad_norm=192.097, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.139e-05, train_time=7.805 +[gpub002:0/64] 2023-07-14 13:02:14,955 (trainer:732) INFO: 49epoch:train:3401-3500batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=72.811, loss_att=56.064, acc=0.705, loss=61.088, backward_time=1.086, grad_norm=128.058, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.138e-05, train_time=3.569 +[gpub002:0/64] 2023-07-14 13:05:10,836 (trainer:732) INFO: 49epoch:train:3501-3600batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=70.741, loss_att=51.039, acc=0.720, loss=56.950, backward_time=1.079, grad_norm=144.537, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.138e-05, train_time=3.517 +[gpub002:0/64] 2023-07-14 13:07:59,359 (trainer:732) INFO: 49epoch:train:3601-3700batch: iter_time=1.242e-04, forward_time=0.144, loss_ctc=82.023, loss_att=65.980, acc=0.690, loss=70.793, backward_time=1.065, grad_norm=119.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.137e-05, train_time=3.370 +[gpub002:0/64] 2023-07-14 13:10:33,803 (trainer:732) INFO: 49epoch:train:3701-3800batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=68.382, loss_att=49.269, acc=0.731, loss=55.003, backward_time=1.040, grad_norm=119.342, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.137e-05, train_time=3.089 +[gpub002:0/64] 2023-07-14 13:13:52,522 (trainer:732) INFO: 49epoch:train:3801-3900batch: iter_time=1.390e-04, forward_time=0.146, loss_ctc=70.071, loss_att=52.760, acc=0.711, loss=57.953, backward_time=1.081, grad_norm=133.837, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.136e-05, train_time=3.974 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2147805.0 ON gpub002 CANCELLED AT 2023-07-14T13:15:07 DUE TO TIME LIMIT *** diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.4.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.4.log new file mode 100644 index 0000000000000000000000000000000000000000..eb703b4bb0ecf382b5cb4706b9999ea940688a82 --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.4.log @@ -0,0 +1,4562 @@ +# Running on gpub007.delta.ncsa.illinois.edu +# Started at Mon Jul 10 05:11:28 CDT 2023 +# SLURMD_NODENAME=gpub007 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2141292 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2141292 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[007,021-024,037,050,052-054,066,073-075,078,091]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[007,021-024,037,050,052-054,066,073-075,078,091]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=2197444 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub007 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 +[gpub007:0/64] 2023-07-10 05:14:04,885 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub007:0/64] 2023-07-10 05:14:06,820 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub007:0/64] 2023-07-10 05:14:06,849 (s2t:483) INFO: Vocabulary size: 50002 +[gpub007:0/64] 2023-07-10 05:14:20,881 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub007:0/64] 2023-07-10 05:14:20,890 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub007:0/64] 2023-07-10 05:14:20,890 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub007:0/64] 2023-07-10 05:14:20,890 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub007:0/64] 2023-07-10 05:14:20,903 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub007:0/64] 2023-07-10 05:14:21,611 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub007:0/64] 2023-07-10 05:14:30,511 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 05:14:30,719 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 05:14:30,719 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub007:0/64] 2023-07-10 05:14:30,788 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub007:0/64] 2023-07-10 05:14:31,269 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 05:14:31,592 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 05:14:31,592 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub007:0/64] 2023-07-10 05:14:31,592 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub007:0/64] 2023-07-10 05:15:01,949 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub007:2197523:2197523 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:2197523:2197523 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:2197523:2197523 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub007:0/64] 2023-07-10 05:15:07,242 (trainer:284) INFO: 31/50epoch started +[gpub007:0/64] 2023-07-10 05:15:07,288 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-10 05:15:24,567 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 05:15:27,864 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 05:15:27,864 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-10 05:15:27,870 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpub053:1853280:1853280 [2] NCCL INFO cudaDriverVersion 12010 +gpub053:1853280:1853280 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:1853280:1853280 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:1853280:1853407 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:1853280:1853407 [2] NCCL INFO Using network IB +gpub053:1853280:1853407 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub053:1853280:1853407 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub053:1853280:1853407 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub053:1853280:1853407 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub053:1853280:1853407 [2] NCCL INFO Connected all rings +gpub053:1853280:1853407 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub053:1853280:1853407 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub053:1853280:1853407 [2] NCCL INFO Connected all trees +gpub053:1853280:1853407 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:1853280:1853407 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:1853280:1853407 [2] NCCL INFO comm 0x9f94540 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub053:1853278:1853278 [0] NCCL INFO cudaDriverVersion 12010 +gpub053:1853278:1853278 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:1853278:1853278 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:1853278:1853410 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:1853278:1853410 [0] NCCL INFO Using network IB +gpub053:1853278:1853410 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub053:1853278:1853410 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub053:1853278:1853410 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub053:1853278:1853410 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub053:1853278:1853410 [0] NCCL INFO Connected all rings +gpub053:1853278:1853410 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub053:1853278:1853410 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub053:1853278:1853410 [0] NCCL INFO Connected all trees +gpub053:1853278:1853410 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:1853278:1853410 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:1853278:1853410 [0] NCCL INFO comm 0x1b893b10 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub075:1498544:1498544 [1] NCCL INFO cudaDriverVersion 12010 +gpub075:1498544:1498544 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0> +gpub075:1498544:1498544 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub075:1498544:1498668 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0> +gpub075:1498544:1498668 [1] NCCL INFO Using network IB +gpub075:1498544:1498668 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub075:1498544:1498668 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub075:1498544:1498668 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub075:1498544:1498668 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub075:1498544:1498668 [1] NCCL INFO Connected all rings +gpub075:1498544:1498668 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub075:1498544:1498668 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub075:1498544:1498668 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub075:1498544:1498668 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub075:1498544:1498668 [1] NCCL INFO Connected all trees +gpub075:1498544:1498668 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub075:1498544:1498668 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub075:1498544:1498668 [1] NCCL INFO comm 0x50791d40 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub053:1853279:1853279 [1] NCCL INFO cudaDriverVersion 12010 +gpub053:1853279:1853279 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:1853279:1853279 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:1853279:1853408 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:1853279:1853408 [1] NCCL INFO Using network IB +gpub053:1853279:1853408 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub053:1853279:1853408 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub053:1853279:1853408 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub053:1853279:1853408 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub053:1853279:1853408 [1] NCCL INFO Connected all rings +gpub053:1853279:1853408 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub053:1853279:1853408 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub053:1853279:1853408 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub053:1853279:1853408 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub053:1853279:1853408 [1] NCCL INFO Connected all trees +gpub053:1853279:1853408 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:1853279:1853408 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:1853279:1853408 [1] NCCL INFO comm 0x8cc80f50 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub022:3643869:3643869 [0] NCCL INFO cudaDriverVersion 12010 +gpub022:3643869:3643869 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:3643869:3643869 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:3643869:3644003 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:3643869:3644003 [0] NCCL INFO Using network IB +gpub022:3643869:3644003 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub022:3643869:3644003 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub022:3643869:3644003 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub022:3643869:3644003 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub022:3643869:3644003 [0] NCCL INFO Connected all rings +gpub022:3643869:3644003 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub022:3643869:3644003 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub022:3643869:3644003 [0] NCCL INFO Connected all trees +gpub022:3643869:3644003 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:3643869:3644003 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:3643869:3644003 [0] NCCL INFO comm 0xb08c9310 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:3643870:3643870 [1] NCCL INFO cudaDriverVersion 12010 +gpub022:3643870:3643870 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:3643870:3643870 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:3643870:3644001 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:3643870:3644001 [1] NCCL INFO Using network IB +gpub022:3643870:3644001 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub022:3643870:3644001 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub022:3643870:3644001 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub022:3643870:3644001 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub022:3643870:3644001 [1] NCCL INFO Connected all rings +gpub022:3643870:3644001 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub022:3643870:3644001 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub022:3643870:3644001 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub022:3643870:3644001 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub022:3643870:3644001 [1] NCCL INFO Connected all trees +gpub022:3643870:3644001 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:3643870:3644001 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:3643870:3644001 [1] NCCL INFO comm 0x97b1940 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub022:3643871:3643871 [2] NCCL INFO cudaDriverVersion 12010 +gpub022:3643871:3643871 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:3643871:3643871 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:3643871:3644002 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:3643871:3644002 [2] NCCL INFO Using network IB +gpub022:3643871:3644002 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub022:3643871:3644002 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub022:3643871:3644002 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub022:3643871:3644002 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub022:3643871:3644002 [2] NCCL INFO Connected all rings +gpub022:3643871:3644002 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub022:3643871:3644002 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub022:3643871:3644002 [2] NCCL INFO Connected all trees +gpub022:3643871:3644002 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:3643871:3644002 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:3643871:3644002 [2] NCCL INFO comm 0x50bbe140 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub075:1498543:1498543 [0] NCCL INFO cudaDriverVersion 12010 +gpub075:1498543:1498543 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0> +gpub075:1498543:1498543 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub075:1498543:1498669 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0> +gpub075:1498543:1498669 [0] NCCL INFO Using network IB +gpub075:1498543:1498669 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub075:1498543:1498669 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub075:1498543:1498669 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub075:1498543:1498669 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub075:1498543:1498669 [0] NCCL INFO Connected all rings +gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub075:1498543:1498669 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub075:1498543:1498669 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub075:1498543:1498669 [0] NCCL INFO Connected all trees +gpub075:1498543:1498669 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub075:1498543:1498669 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub075:1498543:1498669 [0] NCCL INFO comm 0x50bdf1e0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub075:1498546:1498546 [3] NCCL INFO cudaDriverVersion 12010 +gpub075:1498546:1498546 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0> +gpub075:1498546:1498546 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub075:1498546:1498670 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0> +gpub075:1498546:1498670 [3] NCCL INFO Using network IB +gpub075:1498546:1498670 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub075:1498546:1498670 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub075:1498546:1498670 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub075:1498546:1498670 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub075:1498546:1498670 [3] NCCL INFO Connected all rings +gpub075:1498546:1498670 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub075:1498546:1498670 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub075:1498546:1498670 [3] NCCL INFO Connected all trees +gpub075:1498546:1498670 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub075:1498546:1498670 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub075:1498546:1498670 [3] NCCL INFO comm 0x93fedd0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub022:3643872:3643872 [3] NCCL INFO cudaDriverVersion 12010 +gpub022:3643872:3643872 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:3643872:3643872 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:3643872:3644000 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:3643872:3644000 [3] NCCL INFO Using network IB +gpub022:3643872:3644000 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub022:3643872:3644000 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub022:3643872:3644000 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub022:3643872:3644000 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub022:3643872:3644000 [3] NCCL INFO Connected all rings +gpub022:3643872:3644000 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub022:3643872:3644000 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub022:3643872:3644000 [3] NCCL INFO Connected all trees +gpub022:3643872:3644000 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:3643872:3644000 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:3643872:3644000 [3] NCCL INFO comm 0x503e4a90 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:1853281:1853281 [3] NCCL INFO cudaDriverVersion 12010 +gpub053:1853281:1853281 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:1853281:1853281 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:1853281:1853409 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:1853281:1853409 [3] NCCL INFO Using network IB +gpub053:1853281:1853409 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub053:1853281:1853409 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub053:1853281:1853409 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub053:1853281:1853409 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub053:1853281:1853409 [3] NCCL INFO Connected all rings +gpub053:1853281:1853409 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub053:1853281:1853409 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub053:1853281:1853409 [3] NCCL INFO Connected all trees +gpub053:1853281:1853409 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:1853281:1853409 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:1853281:1853409 [3] NCCL INFO comm 0x9dd1ec40 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub074:3674023:3674023 [1] NCCL INFO cudaDriverVersion 12010 +gpub074:3674023:3674023 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3674023:3674023 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3674023:3674154 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3674023:3674154 [1] NCCL INFO Using network IB +gpub074:3674023:3674154 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub074:3674023:3674154 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub074:3674023:3674154 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub074:3674023:3674154 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub074:3674023:3674154 [1] NCCL INFO Connected all rings +gpub074:3674023:3674154 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub074:3674023:3674154 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub074:3674023:3674154 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub074:3674023:3674154 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub074:3674023:3674154 [1] NCCL INFO Connected all trees +gpub074:3674023:3674154 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3674023:3674154 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3674023:3674154 [1] NCCL INFO comm 0x8b2ea8e0 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub074:3674025:3674025 [3] NCCL INFO cudaDriverVersion 12010 +gpub074:3674025:3674025 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3674025:3674025 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3674025:3674155 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3674025:3674155 [3] NCCL INFO Using network IB +gpub074:3674025:3674155 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub074:3674025:3674155 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub074:3674025:3674155 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub074:3674025:3674155 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub074:3674025:3674155 [3] NCCL INFO Connected all rings +gpub074:3674025:3674155 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub074:3674025:3674155 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub074:3674025:3674155 [3] NCCL INFO Connected all trees +gpub074:3674025:3674155 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3674025:3674155 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3674025:3674155 [3] NCCL INFO comm 0x8bfc7020 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub074:3674022:3674022 [0] NCCL INFO cudaDriverVersion 12010 +gpub074:3674022:3674022 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3674022:3674022 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3674022:3674152 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3674022:3674152 [0] NCCL INFO Using network IB +gpub074:3674022:3674152 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub074:3674022:3674152 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:3674022:3674152 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub074:3674022:3674152 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub074:3674022:3674152 [0] NCCL INFO Connected all rings +gpub074:3674022:3674152 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:3674022:3674152 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub074:3674022:3674152 [0] NCCL INFO Connected all trees +gpub074:3674022:3674152 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3674022:3674152 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3674022:3674152 [0] NCCL INFO comm 0x50173c20 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub074:3674024:3674024 [2] NCCL INFO cudaDriverVersion 12010 +gpub074:3674024:3674024 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:3674024:3674024 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:3674024:3674153 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:3674024:3674153 [2] NCCL INFO Using network IB +gpub074:3674024:3674153 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub074:3674024:3674153 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub074:3674024:3674153 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub074:3674024:3674153 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub074:3674024:3674153 [2] NCCL INFO Connected all rings +gpub074:3674024:3674153 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub074:3674024:3674153 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub074:3674024:3674153 [2] NCCL INFO Connected all trees +gpub074:3674024:3674153 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub074:3674024:3674153 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub074:3674024:3674153 [2] NCCL INFO comm 0x4fe75600 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub075:1498545:1498545 [2] NCCL INFO cudaDriverVersion 12010 +gpub075:1498545:1498545 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0> +gpub075:1498545:1498545 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub075:1498545:1498671 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0> +gpub075:1498545:1498671 [2] NCCL INFO Using network IB +gpub075:1498545:1498671 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub075:1498545:1498671 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub075:1498545:1498671 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub075:1498545:1498671 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub075:1498545:1498671 [2] NCCL INFO Connected all rings +gpub075:1498545:1498671 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub075:1498545:1498671 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub075:1498545:1498671 [2] NCCL INFO Connected all trees +gpub075:1498545:1498671 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub075:1498545:1498671 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub075:1498545:1498671 [2] NCCL INFO comm 0x500cd5c0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:204478:204478 [3] NCCL INFO cudaDriverVersion 12010 +gpub078:204478:204478 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:204478:204478 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:204478:204627 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:204478:204627 [3] NCCL INFO Using network IB +gpub078:204478:204627 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub078:204478:204627 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub078:204478:204627 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:204478:204627 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub078:204478:204627 [3] NCCL INFO Connected all rings +gpub078:204478:204627 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:204478:204627 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub078:204478:204627 [3] NCCL INFO Connected all trees +gpub078:204478:204627 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:204478:204627 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:204478:204627 [3] NCCL INFO comm 0x4fdeca90 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub078:204477:204477 [2] NCCL INFO cudaDriverVersion 12010 +gpub078:204477:204477 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:204477:204477 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:204477:204628 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:204477:204628 [2] NCCL INFO Using network IB +gpub078:204477:204628 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub078:204477:204628 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub078:204477:204628 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:204477:204628 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub078:204477:204628 [2] NCCL INFO Connected all rings +gpub078:204477:204628 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:204477:204628 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub078:204477:204628 [2] NCCL INFO Connected all trees +gpub078:204477:204628 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:204477:204628 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:204477:204628 [2] NCCL INFO comm 0x950be50 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub021:390566:390566 [3] NCCL INFO cudaDriverVersion 12010 +gpub021:390566:390566 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.121<0> +gpub021:390566:390566 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub021:390566:390693 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.121<0> +gpub021:390566:390693 [3] NCCL INFO Using network IB +gpub021:390566:390693 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub021:390566:390693 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub021:390566:390693 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub021:390566:390693 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub021:390566:390693 [3] NCCL INFO Connected all rings +gpub021:390566:390693 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub021:390566:390693 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub021:390566:390693 [3] NCCL INFO Connected all trees +gpub021:390566:390693 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub021:390566:390693 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub021:390566:390693 [3] NCCL INFO comm 0x51469130 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub054:3656063:3656063 [1] NCCL INFO cudaDriverVersion 12010 +gpub054:3656063:3656063 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:3656063:3656063 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:3656063:3656192 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0> +gpub054:3656063:3656192 [1] NCCL INFO Using network IB +gpub054:3656063:3656192 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub054:3656063:3656192 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub054:3656063:3656192 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub054:3656063:3656192 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub054:3656063:3656192 [1] NCCL INFO Connected all rings +gpub054:3656063:3656192 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub054:3656063:3656192 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub054:3656063:3656192 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub054:3656063:3656192 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub054:3656063:3656192 [1] NCCL INFO Connected all trees +gpub054:3656063:3656192 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:3656063:3656192 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:3656063:3656192 [1] NCCL INFO comm 0x50da24d0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub021:390563:390563 [0] NCCL INFO cudaDriverVersion 12010 +gpub021:390563:390563 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.121<0> +gpub021:390563:390563 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub021:390563:390695 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.121<0> +gpub021:390563:390695 [0] NCCL INFO Using network IB +gpub021:390563:390695 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub021:390563:390695 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub021:390563:390695 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub021:390563:390695 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub021:390563:390695 [0] NCCL INFO Connected all rings +gpub054:3656064:3656064 [2] NCCL INFO cudaDriverVersion 12010 +gpub054:3656064:3656064 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:3656064:3656064 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:3656064:3656191 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0> +gpub054:3656064:3656191 [2] NCCL INFO Using network IB +gpub054:3656064:3656191 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub054:3656064:3656191 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub054:3656064:3656191 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub054:3656064:3656191 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub054:3656064:3656191 [2] NCCL INFO Connected all rings +gpub054:3656064:3656191 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub054:3656064:3656191 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub073:545753:545753 [3] NCCL INFO cudaDriverVersion 12010 +gpub073:545753:545753 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:545753:545753 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:545753:545878 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:545753:545878 [3] NCCL INFO Using network IB +gpub073:545753:545878 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub073:545753:545878 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub073:545753:545878 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub073:545753:545878 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub073:545753:545878 [3] NCCL INFO Connected all rings +gpub073:545753:545878 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub073:545753:545878 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub021:390563:390695 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub021:390563:390695 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub021:390563:390695 [0] NCCL INFO Connected all trees +gpub021:390563:390695 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub021:390563:390695 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub021:390563:390695 [0] NCCL INFO comm 0x51605390 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub054:3656064:3656191 [2] NCCL INFO Connected all trees +gpub054:3656064:3656191 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:3656064:3656191 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:3656064:3656191 [2] NCCL INFO comm 0x8a8d4950 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub073:545753:545878 [3] NCCL INFO Connected all trees +gpub073:545753:545878 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:545753:545878 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:545753:545878 [3] NCCL INFO comm 0x50602840 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub021:390564:390564 [1] NCCL INFO cudaDriverVersion 12010 +gpub021:390564:390564 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.121<0> +gpub021:390564:390564 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub021:390564:390694 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.121<0> +gpub021:390564:390694 [1] NCCL INFO Using network IB +gpub021:390564:390694 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub021:390564:390694 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub021:390564:390694 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub021:390564:390694 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub021:390564:390694 [1] NCCL INFO Connected all rings +gpub021:390564:390694 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub021:390564:390694 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub054:3656065:3656065 [3] NCCL INFO cudaDriverVersion 12010 +gpub054:3656065:3656065 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:3656065:3656065 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:3656065:3656193 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0> +gpub054:3656065:3656193 [3] NCCL INFO Using network IB +gpub054:3656065:3656193 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub054:3656065:3656193 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub054:3656065:3656193 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub054:3656065:3656193 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub054:3656065:3656193 [3] NCCL INFO Connected all rings +gpub054:3656065:3656193 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub054:3656065:3656193 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub021:390564:390694 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub021:390564:390694 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub021:390564:390694 [1] NCCL INFO Connected all trees +gpub021:390564:390694 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub021:390564:390694 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub021:390564:390694 [1] NCCL INFO comm 0x9e6cc8d0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub054:3656065:3656193 [3] NCCL INFO Connected all trees +gpub054:3656065:3656193 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:3656065:3656193 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:3656065:3656193 [3] NCCL INFO comm 0xb59a2f90 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub037:1697458:1697458 [2] NCCL INFO cudaDriverVersion 12010 +gpub037:1697458:1697458 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1697458:1697458 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1697458:1697595 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1697458:1697595 [2] NCCL INFO Using network IB +gpub037:1697458:1697595 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub037:1697458:1697595 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub037:1697458:1697595 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub037:1697458:1697595 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub037:1697458:1697595 [2] NCCL INFO Connected all rings +gpub037:1697458:1697595 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub037:1697458:1697595 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub037:1697458:1697595 [2] NCCL INFO Connected all trees +gpub037:1697458:1697595 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub037:1697458:1697595 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1697458:1697595 [2] NCCL INFO comm 0x505de110 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub037:1697456:1697456 [0] NCCL INFO cudaDriverVersion 12010 +gpub037:1697456:1697456 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1697456:1697456 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1697456:1697592 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1697456:1697592 [0] NCCL INFO Using network IB +gpub037:1697456:1697592 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub037:1697456:1697592 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub037:1697456:1697592 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1697456:1697592 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub037:1697456:1697592 [0] NCCL INFO Connected all rings +gpub024:181797:181797 [3] NCCL INFO cudaDriverVersion 12010 +gpub024:181797:181797 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:181797:181797 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:181797:181928 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:181797:181928 [3] NCCL INFO Using network IB +gpub024:181797:181928 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub024:181797:181928 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub024:181797:181928 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub024:181797:181928 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub024:181797:181928 [3] NCCL INFO Connected all rings +gpub024:181797:181928 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub024:181797:181928 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1697456:1697592 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub037:1697456:1697592 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub037:1697456:1697592 [0] NCCL INFO Connected all trees +gpub037:1697456:1697592 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub037:1697456:1697592 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1697456:1697592 [0] NCCL INFO comm 0x51a3c2e0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub024:181797:181928 [3] NCCL INFO Connected all trees +gpub024:181797:181928 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:181797:181928 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:181797:181928 [3] NCCL INFO comm 0x507c77a0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub050:2358256:2358256 [2] NCCL INFO cudaDriverVersion 12010 +gpub050:2358256:2358256 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2358256:2358256 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2358256:2358382 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2358256:2358382 [2] NCCL INFO Using network IB +gpub050:2358256:2358382 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub050:2358256:2358382 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub050:2358256:2358382 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub050:2358256:2358382 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub050:2358256:2358382 [2] NCCL INFO Connected all rings +gpub050:2358256:2358382 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub050:2358256:2358382 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub007:2197525:2197525 [2] NCCL INFO cudaDriverVersion 12010 +gpub007:2197525:2197525 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:2197525:2197525 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:2197525:2197660 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.107<0> +gpub007:2197525:2197660 [2] NCCL INFO Using network IB +gpub007:2197525:2197660 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub007:2197525:2197660 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub007:2197525:2197660 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub007:2197525:2197660 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub007:2197525:2197660 [2] NCCL INFO Connected all rings +gpub007:2197525:2197660 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub007:2197525:2197660 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub050:2358256:2358382 [2] NCCL INFO Connected all trees +gpub050:2358256:2358382 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2358256:2358382 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2358256:2358382 [2] NCCL INFO comm 0x5136dc80 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub007:2197525:2197660 [2] NCCL INFO Connected all trees +gpub007:2197525:2197660 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:2197525:2197660 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:2197525:2197660 [2] NCCL INFO comm 0xb6001d10 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub024:181794:181794 [0] NCCL INFO cudaDriverVersion 12010 +gpub024:181794:181794 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:181794:181794 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:181794:181927 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:181794:181927 [0] NCCL INFO Using network IB +gpub024:181794:181927 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub024:181794:181927 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub024:181794:181927 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub024:181794:181927 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub024:181794:181927 [0] NCCL INFO Connected all rings +gpub024:181794:181927 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub024:181794:181927 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub024:181794:181927 [0] NCCL INFO Connected all trees +gpub024:181794:181927 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:181794:181927 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:181794:181927 [0] NCCL INFO comm 0xa37e3f90 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub007:2197524:2197524 [1] NCCL INFO cudaDriverVersion 12010 +gpub007:2197524:2197524 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:2197524:2197524 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:2197524:2197661 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.107<0> +gpub007:2197524:2197661 [1] NCCL INFO Using network IB +gpub007:2197524:2197661 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub007:2197524:2197661 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub007:2197524:2197661 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub007:2197524:2197661 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub007:2197524:2197661 [1] NCCL INFO Connected all rings +gpub007:2197524:2197661 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub007:2197524:2197661 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub007:2197524:2197661 [1] NCCL INFO Connected all trees +gpub007:2197524:2197661 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:2197524:2197661 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:2197524:2197661 [1] NCCL INFO comm 0x8cce800 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub050:2358255:2358255 [1] NCCL INFO cudaDriverVersion 12010 +gpub050:2358255:2358255 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2358255:2358255 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2358255:2358384 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2358255:2358384 [1] NCCL INFO Using network IB +gpub050:2358255:2358384 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub050:2358255:2358384 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub050:2358255:2358384 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub050:2358255:2358384 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub050:2358255:2358384 [1] NCCL INFO Connected all rings +gpub050:2358255:2358384 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub050:2358255:2358384 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub050:2358255:2358384 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub050:2358255:2358384 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub050:2358255:2358384 [1] NCCL INFO Connected all trees +gpub050:2358255:2358384 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2358255:2358384 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2358255:2358384 [1] NCCL INFO comm 0xb33f9a00 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub050:2358254:2358254 [0] NCCL INFO cudaDriverVersion 12010 +gpub050:2358254:2358254 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2358254:2358254 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2358254:2358383 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2358254:2358383 [0] NCCL INFO Using network IB +gpub050:2358254:2358383 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub050:2358254:2358383 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub050:2358254:2358383 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub050:2358254:2358383 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub050:2358254:2358383 [0] NCCL INFO Connected all rings +gpub050:2358254:2358383 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub050:2358254:2358383 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub050:2358254:2358383 [0] NCCL INFO Connected all trees +gpub050:2358254:2358383 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2358254:2358383 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2358254:2358383 [0] NCCL INFO comm 0xab879950 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub073:545751:545751 [1] NCCL INFO cudaDriverVersion 12010 +gpub073:545751:545751 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:545751:545751 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:545751:545879 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:545751:545879 [1] NCCL INFO Using network IB +gpub073:545751:545879 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub073:545751:545879 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub073:545751:545879 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub073:545751:545879 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub073:545751:545879 [1] NCCL INFO Connected all rings +gpub073:545751:545879 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub073:545751:545879 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub052:2095168:2095168 [2] NCCL INFO cudaDriverVersion 12010 +gpub052:2095168:2095168 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2095168:2095168 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2095168:2095300 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2095168:2095300 [2] NCCL INFO Using network IB +gpub052:2095168:2095300 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub052:2095168:2095300 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub052:2095168:2095300 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub052:2095168:2095300 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub052:2095168:2095300 [2] NCCL INFO Connected all rings +gpub052:2095168:2095300 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub052:2095168:2095300 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub073:545751:545879 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub073:545751:545879 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub073:545751:545879 [1] NCCL INFO Connected all trees +gpub073:545751:545879 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:545751:545879 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:545751:545879 [1] NCCL INFO comm 0x5091fcf0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub052:2095168:2095300 [2] NCCL INFO Connected all trees +gpub052:2095168:2095300 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2095168:2095300 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2095168:2095300 [2] NCCL INFO comm 0x9dba4a0 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub024:181796:181796 [2] NCCL INFO cudaDriverVersion 12010 +gpub024:181796:181796 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:181796:181796 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:181796:181926 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:181796:181926 [2] NCCL INFO Using network IB +gpub024:181796:181926 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub024:181796:181926 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub024:181796:181926 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub024:181796:181926 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub024:181796:181926 [2] NCCL INFO Connected all rings +gpub024:181796:181926 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub024:181796:181926 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub024:181796:181926 [2] NCCL INFO Connected all trees +gpub052:2095166:2095166 [0] NCCL INFO cudaDriverVersion 12010 +gpub052:2095166:2095166 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2095166:2095166 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2095166:2095301 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2095166:2095301 [0] NCCL INFO Using network IB +gpub052:2095166:2095301 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub052:2095166:2095301 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub052:2095166:2095301 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub052:2095166:2095301 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub052:2095166:2095301 [0] NCCL INFO Connected all rings +gpub024:181796:181926 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:181796:181926 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:181796:181926 [2] NCCL INFO comm 0x9361eb0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub021:390565:390565 [2] NCCL INFO cudaDriverVersion 12010 +gpub021:390565:390565 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.121<0> +gpub021:390565:390565 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub021:390565:390696 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.121<0> +gpub021:390565:390696 [2] NCCL INFO Using network IB +gpub021:390565:390696 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub021:390565:390696 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub021:390565:390696 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub021:390565:390696 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub021:390565:390696 [2] NCCL INFO Connected all rings +gpub021:390565:390696 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub021:390565:390696 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub021:390565:390696 [2] NCCL INFO Connected all trees +gpub052:2095166:2095301 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub052:2095166:2095301 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub052:2095166:2095301 [0] NCCL INFO Connected all trees +gpub052:2095166:2095301 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2095166:2095301 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2095166:2095301 [0] NCCL INFO comm 0x98eb2450 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub021:390565:390696 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub021:390565:390696 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub021:390565:390696 [2] NCCL INFO comm 0x507a64a0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub054:3656062:3656062 [0] NCCL INFO cudaDriverVersion 12010 +gpub054:3656062:3656062 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:3656062:3656062 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:3656062:3656190 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0> +gpub054:3656062:3656190 [0] NCCL INFO Using network IB +gpub054:3656062:3656190 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub054:3656062:3656190 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub054:3656062:3656190 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub054:3656062:3656190 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub054:3656062:3656190 [0] NCCL INFO Connected all rings +gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub054:3656062:3656190 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub054:3656062:3656190 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub054:3656062:3656190 [0] NCCL INFO Connected all trees +gpub054:3656062:3656190 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:3656062:3656190 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:3656062:3656190 [0] NCCL INFO comm 0x83d6380 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub007:2197526:2197526 [3] NCCL INFO cudaDriverVersion 12010 +gpub007:2197526:2197526 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:2197526:2197526 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:2197526:2197659 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.107<0> +gpub007:2197526:2197659 [3] NCCL INFO Using network IB +gpub007:2197526:2197659 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub007:2197526:2197659 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub007:2197526:2197659 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub007:2197526:2197659 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub007:2197526:2197659 [3] NCCL INFO Connected all rings +gpub007:2197526:2197659 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub007:2197526:2197659 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub007:2197526:2197659 [3] NCCL INFO Connected all trees +gpub007:2197526:2197659 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:2197526:2197659 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:2197526:2197659 [3] NCCL INFO comm 0xaeb3b350 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub037:1697459:1697459 [3] NCCL INFO cudaDriverVersion 12010 +gpub037:1697459:1697459 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1697459:1697459 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1697459:1697594 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1697459:1697594 [3] NCCL INFO Using network IB +gpub037:1697459:1697594 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub037:1697459:1697594 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub037:1697459:1697594 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub037:1697459:1697594 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub037:1697459:1697594 [3] NCCL INFO Connected all rings +gpub037:1697459:1697594 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub037:1697459:1697594 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub037:1697459:1697594 [3] NCCL INFO Connected all trees +gpub037:1697459:1697594 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub037:1697459:1697594 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1697459:1697594 [3] NCCL INFO comm 0x4f67f390 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub007:2197523:2197658 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.107<0> +gpub007:2197523:2197658 [0] NCCL INFO Using network IB +gpub007:2197523:2197658 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub007:2197523:2197658 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub007:2197523:2197658 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub007:2197523:2197658 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub007:2197523:2197658 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub007:2197523:2197658 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub007:2197523:2197658 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub007:2197523:2197658 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub007:2197523:2197658 [0] NCCL INFO Connected all rings +gpub007:2197523:2197658 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub007:2197523:2197658 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub007:2197523:2197658 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub007:2197523:2197658 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub007:2197523:2197658 [0] NCCL INFO Connected all trees +gpub007:2197523:2197658 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:2197523:2197658 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:2197523:2197658 [0] NCCL INFO comm 0x8cd7e00 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub073:545750:545750 [0] NCCL INFO cudaDriverVersion 12010 +gpub073:545750:545750 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:545750:545750 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:545750:545881 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:545750:545881 [0] NCCL INFO Using network IB +gpub073:545750:545881 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub073:545750:545881 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub073:545750:545881 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub073:545750:545881 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub073:545750:545881 [0] NCCL INFO Connected all rings +gpub073:545750:545881 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub073:545750:545881 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub073:545750:545881 [0] NCCL INFO Connected all trees +gpub073:545750:545881 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:545750:545881 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:545750:545881 [0] NCCL INFO comm 0x8ee1c2d0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub073:545752:545752 [2] NCCL INFO cudaDriverVersion 12010 +gpub073:545752:545752 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0> +gpub073:545752:545752 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub073:545752:545880 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0> +gpub073:545752:545880 [2] NCCL INFO Using network IB +gpub073:545752:545880 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub073:545752:545880 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub073:545752:545880 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub073:545752:545880 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub073:545752:545880 [2] NCCL INFO Connected all rings +gpub073:545752:545880 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub073:545752:545880 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub073:545752:545880 [2] NCCL INFO Connected all trees +gpub073:545752:545880 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub073:545752:545880 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub073:545752:545880 [2] NCCL INFO comm 0x9222770 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub050:2358257:2358257 [3] NCCL INFO cudaDriverVersion 12010 +gpub050:2358257:2358257 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:2358257:2358257 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:2358257:2358385 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:2358257:2358385 [3] NCCL INFO Using network IB +gpub050:2358257:2358385 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub050:2358257:2358385 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub050:2358257:2358385 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub050:2358257:2358385 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub050:2358257:2358385 [3] NCCL INFO Connected all rings +gpub050:2358257:2358385 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub050:2358257:2358385 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub050:2358257:2358385 [3] NCCL INFO Connected all trees +gpub050:2358257:2358385 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:2358257:2358385 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:2358257:2358385 [3] NCCL INFO comm 0x1bc447d0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub066:1609738:1609738 [2] NCCL INFO cudaDriverVersion 12010 +gpub066:1609738:1609738 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1609738:1609738 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1609738:1609861 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1609738:1609861 [2] NCCL INFO Using network IB +gpub066:1609738:1609861 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub066:1609738:1609861 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub066:1609738:1609861 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub066:1609738:1609861 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub066:1609738:1609861 [2] NCCL INFO Connected all rings +gpub066:1609738:1609861 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub066:1609738:1609861 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub066:1609738:1609861 [2] NCCL INFO Connected all trees +gpub066:1609738:1609861 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub066:1609738:1609861 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1609738:1609861 [2] NCCL INFO comm 0x98236b90 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub066:1609739:1609739 [3] NCCL INFO cudaDriverVersion 12010 +gpub066:1609739:1609739 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1609739:1609739 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1609739:1609860 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1609739:1609860 [3] NCCL INFO Using network IB +gpub066:1609739:1609860 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub066:1609739:1609860 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub066:1609739:1609860 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub066:1609739:1609860 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub066:1609739:1609860 [3] NCCL INFO Connected all rings +gpub066:1609739:1609860 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub066:1609739:1609860 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub066:1609739:1609860 [3] NCCL INFO Connected all trees +gpub066:1609739:1609860 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub066:1609739:1609860 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1609739:1609860 [3] NCCL INFO comm 0x50eb5f50 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub066:1609737:1609737 [1] NCCL INFO cudaDriverVersion 12010 +gpub066:1609737:1609737 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1609737:1609737 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1609737:1609862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1609737:1609862 [1] NCCL INFO Using network IB +gpub066:1609737:1609862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub066:1609737:1609862 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub066:1609737:1609862 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub066:1609737:1609862 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub066:1609737:1609862 [1] NCCL INFO Connected all rings +gpub066:1609737:1609862 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub066:1609737:1609862 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub078:204475:204475 [0] NCCL INFO cudaDriverVersion 12010 +gpub078:204475:204475 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:204475:204475 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:204475:204626 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:204475:204626 [0] NCCL INFO Using network IB +gpub078:204475:204626 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub078:204475:204626 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:204475:204626 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:204475:204626 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub078:204475:204626 [0] NCCL INFO Connected all rings +gpub066:1609737:1609862 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub066:1609737:1609862 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub066:1609737:1609862 [1] NCCL INFO Connected all trees +gpub066:1609737:1609862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub066:1609737:1609862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1609737:1609862 [1] NCCL INFO comm 0x8ebd08f0 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub078:204475:204626 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub078:204475:204626 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub078:204475:204626 [0] NCCL INFO Connected all trees +gpub078:204475:204626 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:204475:204626 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:204475:204626 [0] NCCL INFO comm 0x506b5c80 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub078:204476:204476 [1] NCCL INFO cudaDriverVersion 12010 +gpub078:204476:204476 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:204476:204476 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:204476:204625 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:204476:204625 [1] NCCL INFO Using network IB +gpub078:204476:204625 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub078:204476:204625 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub078:204476:204625 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:204476:204625 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub078:204476:204625 [1] NCCL INFO Connected all rings +gpub078:204476:204625 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub078:204476:204625 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub078:204476:204625 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:204476:204625 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub078:204476:204625 [1] NCCL INFO Connected all trees +gpub078:204476:204625 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:204476:204625 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:204476:204625 [1] NCCL INFO comm 0xba083a40 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub066:1609736:1609736 [0] NCCL INFO cudaDriverVersion 12010 +gpub066:1609736:1609736 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1609736:1609736 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1609736:1609863 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1609736:1609863 [0] NCCL INFO Using network IB +gpub066:1609736:1609863 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub066:1609736:1609863 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1609736:1609863 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub066:1609736:1609863 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub066:1609736:1609863 [0] NCCL INFO Connected all rings +gpub066:1609736:1609863 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1609736:1609863 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub066:1609736:1609863 [0] NCCL INFO Connected all trees +gpub066:1609736:1609863 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub066:1609736:1609863 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1609736:1609863 [0] NCCL INFO comm 0x8ce8640 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub091:2047659:2047659 [1] NCCL INFO cudaDriverVersion 12010 +gpub091:2047659:2047659 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0> +gpub091:2047659:2047659 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub091:2047659:2047795 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0> +gpub091:2047659:2047795 [1] NCCL INFO Using network IB +gpub091:2047659:2047795 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub091:2047659:2047795 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub091:2047659:2047795 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub091:2047659:2047795 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub091:2047659:2047795 [1] NCCL INFO Connected all rings +gpub091:2047659:2047795 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub091:2047659:2047795 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub091:2047659:2047795 [1] NCCL INFO Connected all trees +gpub091:2047659:2047795 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub091:2047659:2047795 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub091:2047659:2047795 [1] NCCL INFO comm 0x50f1f160 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub037:1697457:1697457 [1] NCCL INFO cudaDriverVersion 12010 +gpub037:1697457:1697457 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1697457:1697457 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1697457:1697593 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1697457:1697593 [1] NCCL INFO Using network IB +gpub037:1697457:1697593 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub037:1697457:1697593 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub037:1697457:1697593 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub037:1697457:1697593 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub037:1697457:1697593 [1] NCCL INFO Connected all rings +gpub037:1697457:1697593 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub037:1697457:1697593 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub037:1697457:1697593 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub037:1697457:1697593 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub037:1697457:1697593 [1] NCCL INFO Connected all trees +gpub037:1697457:1697593 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub037:1697457:1697593 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1697457:1697593 [1] NCCL INFO comm 0x50ddb3c0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub024:181795:181795 [1] NCCL INFO cudaDriverVersion 12010 +gpub024:181795:181795 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0> +gpub024:181795:181795 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub024:181795:181929 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0> +gpub024:181795:181929 [1] NCCL INFO Using network IB +gpub024:181795:181929 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub024:181795:181929 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub024:181795:181929 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub024:181795:181929 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub024:181795:181929 [1] NCCL INFO Connected all rings +gpub024:181795:181929 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub024:181795:181929 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub024:181795:181929 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub024:181795:181929 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub024:181795:181929 [1] NCCL INFO Connected all trees +gpub024:181795:181929 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub024:181795:181929 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub024:181795:181929 [1] NCCL INFO comm 0x9cf8ce0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub023:3256278:3256278 [3] NCCL INFO cudaDriverVersion 12010 +gpub023:3256278:3256278 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0> +gpub023:3256278:3256278 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub023:3256278:3256407 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.123<0> +gpub023:3256278:3256407 [3] NCCL INFO Using network IB +gpub023:3256278:3256407 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub023:3256278:3256407 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub023:3256278:3256407 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub023:3256278:3256407 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub023:3256278:3256407 [3] NCCL INFO Connected all rings +gpub023:3256278:3256407 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub023:3256278:3256407 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub023:3256278:3256407 [3] NCCL INFO Connected all trees +gpub023:3256278:3256407 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub023:3256278:3256407 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub023:3256278:3256407 [3] NCCL INFO comm 0x5130f2d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub052:2095169:2095169 [3] NCCL INFO cudaDriverVersion 12010 +gpub052:2095169:2095169 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2095169:2095169 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2095169:2095299 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2095169:2095299 [3] NCCL INFO Using network IB +gpub052:2095169:2095299 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub052:2095169:2095299 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub052:2095169:2095299 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub052:2095169:2095299 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub052:2095169:2095299 [3] NCCL INFO Connected all rings +gpub052:2095169:2095299 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub052:2095169:2095299 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub052:2095169:2095299 [3] NCCL INFO Connected all trees +gpub052:2095169:2095299 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2095169:2095299 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2095169:2095299 [3] NCCL INFO comm 0x93a3e60 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub052:2095167:2095167 [1] NCCL INFO cudaDriverVersion 12010 +gpub052:2095167:2095167 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:2095167:2095167 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:2095167:2095302 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:2095167:2095302 [1] NCCL INFO Using network IB +gpub052:2095167:2095302 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub052:2095167:2095302 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub052:2095167:2095302 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub052:2095167:2095302 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub052:2095167:2095302 [1] NCCL INFO Connected all rings +gpub052:2095167:2095302 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub052:2095167:2095302 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub091:2047658:2047658 [0] NCCL INFO cudaDriverVersion 12010 +gpub091:2047658:2047658 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0> +gpub091:2047658:2047658 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub091:2047658:2047793 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0> +gpub091:2047658:2047793 [0] NCCL INFO Using network IB +gpub091:2047658:2047793 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub091:2047658:2047793 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub091:2047658:2047793 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub091:2047658:2047793 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub091:2047658:2047793 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub091:2047658:2047793 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub091:2047658:2047793 [0] NCCL INFO Connected all rings +gpub052:2095167:2095302 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub052:2095167:2095302 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub052:2095167:2095302 [1] NCCL INFO Connected all trees +gpub052:2095167:2095302 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:2095167:2095302 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:2095167:2095302 [1] NCCL INFO comm 0x168bcad0 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub091:2047658:2047793 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub091:2047658:2047793 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub091:2047658:2047793 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub091:2047658:2047793 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub091:2047658:2047793 [0] NCCL INFO Connected all trees +gpub091:2047658:2047793 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub091:2047658:2047793 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub091:2047658:2047793 [0] NCCL INFO comm 0x50181450 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub023:3256276:3256276 [1] NCCL INFO cudaDriverVersion 12010 +gpub023:3256276:3256276 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0> +gpub023:3256276:3256276 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub023:3256276:3256406 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.123<0> +gpub023:3256276:3256406 [1] NCCL INFO Using network IB +gpub023:3256276:3256406 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub023:3256276:3256406 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub023:3256276:3256406 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub023:3256276:3256406 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub023:3256276:3256406 [1] NCCL INFO Connected all rings +gpub023:3256276:3256406 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub023:3256276:3256406 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub023:3256276:3256406 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub023:3256276:3256406 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub023:3256276:3256406 [1] NCCL INFO Connected all trees +gpub023:3256276:3256406 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub023:3256276:3256406 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub023:3256276:3256406 [1] NCCL INFO comm 0x8cb3a6d0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub091:2047661:2047661 [3] NCCL INFO cudaDriverVersion 12010 +gpub091:2047661:2047661 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0> +gpub091:2047661:2047661 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub091:2047661:2047794 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0> +gpub091:2047661:2047794 [3] NCCL INFO Using network IB +gpub091:2047661:2047794 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub091:2047661:2047794 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub091:2047661:2047794 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub091:2047661:2047794 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub091:2047661:2047794 [3] NCCL INFO Connected all rings +gpub091:2047661:2047794 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub091:2047661:2047794 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub091:2047661:2047794 [3] NCCL INFO Connected all trees +gpub091:2047661:2047794 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub091:2047661:2047794 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub091:2047661:2047794 [3] NCCL INFO comm 0x8cc3b130 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub023:3256277:3256277 [2] NCCL INFO cudaDriverVersion 12010 +gpub023:3256277:3256277 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0> +gpub023:3256277:3256277 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub023:3256277:3256404 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.123<0> +gpub023:3256277:3256404 [2] NCCL INFO Using network IB +gpub023:3256277:3256404 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub023:3256277:3256404 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub023:3256277:3256404 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub023:3256277:3256404 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub023:3256277:3256404 [2] NCCL INFO Connected all rings +gpub023:3256277:3256404 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub023:3256277:3256404 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub023:3256277:3256404 [2] NCCL INFO Connected all trees +gpub023:3256277:3256404 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub023:3256277:3256404 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub023:3256277:3256404 [2] NCCL INFO comm 0x4f2ef4b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub023:3256275:3256275 [0] NCCL INFO cudaDriverVersion 12010 +gpub023:3256275:3256275 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0> +gpub023:3256275:3256275 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub023:3256275:3256405 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.123<0> +gpub023:3256275:3256405 [0] NCCL INFO Using network IB +gpub023:3256275:3256405 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub023:3256275:3256405 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub023:3256275:3256405 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub023:3256275:3256405 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub023:3256275:3256405 [0] NCCL INFO Connected all rings +gpub023:3256275:3256405 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub023:3256275:3256405 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub023:3256275:3256405 [0] NCCL INFO Connected all trees +gpub023:3256275:3256405 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub023:3256275:3256405 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub023:3256275:3256405 [0] NCCL INFO comm 0xa128c40 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub091:2047660:2047660 [2] NCCL INFO cudaDriverVersion 12010 +gpub091:2047660:2047660 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0> +gpub091:2047660:2047660 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub091:2047660:2047792 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0> +gpub091:2047660:2047792 [2] NCCL INFO Using network IB +gpub091:2047660:2047792 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub091:2047660:2047792 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub091:2047660:2047792 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub091:2047660:2047792 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub091:2047660:2047792 [2] NCCL INFO Connected all rings +gpub091:2047660:2047792 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub091:2047660:2047792 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub091:2047660:2047792 [2] NCCL INFO Connected all trees +gpub091:2047660:2047792 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub091:2047660:2047792 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub091:2047660:2047792 [2] NCCL INFO comm 0xbeb51a80 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +[gpub007:0/64] 2023-07-10 05:23:10,955 (trainer:732) INFO: 31epoch:train:1-100batch: iter_time=1.183, forward_time=0.245, loss_ctc=83.079, loss_att=57.526, acc=0.711, loss=65.192, backward_time=1.046, grad_norm=110.657, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.565e-05, train_time=9.671 +[gpub007:0/64] 2023-07-10 05:25:37,047 (trainer:732) INFO: 31epoch:train:101-200batch: iter_time=1.109e-04, forward_time=0.143, loss_ctc=79.221, loss_att=65.254, acc=0.698, loss=69.444, backward_time=1.041, grad_norm=126.389, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.564e-05, train_time=2.923 +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub007:0/64] 2023-07-10 05:28:01,147 (trainer:732) INFO: 31epoch:train:201-300batch: iter_time=1.051e-04, forward_time=0.143, loss_ctc=74.048, loss_att=56.198, acc=0.684, loss=61.553, backward_time=1.037, grad_norm=124.834, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.562e-05, train_time=2.882 +[gpub007:0/64] 2023-07-10 05:30:23,878 (trainer:732) INFO: 31epoch:train:301-400batch: iter_time=1.221e-04, forward_time=0.142, loss_ctc=68.222, loss_att=55.104, acc=0.713, loss=59.039, backward_time=1.037, grad_norm=98.763, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.179, optim0_lr0=6.561e-05, train_time=2.854 +[gpub007:0/64] 2023-07-10 05:32:49,468 (trainer:732) INFO: 31epoch:train:401-500batch: iter_time=1.193e-04, forward_time=0.143, loss_ctc=72.589, loss_att=59.138, acc=0.708, loss=63.173, backward_time=1.037, grad_norm=105.545, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.560e-05, train_time=2.912 +[gpub007:0/64] 2023-07-10 05:35:12,590 (trainer:732) INFO: 31epoch:train:501-600batch: iter_time=1.200e-04, forward_time=0.142, loss_ctc=78.429, loss_att=57.192, acc=0.714, loss=63.563, backward_time=1.029, grad_norm=115.940, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.559e-05, train_time=2.862 +[gpub007:0/64] 2023-07-10 05:37:33,602 (trainer:732) INFO: 31epoch:train:601-700batch: iter_time=1.086e-04, forward_time=0.142, loss_ctc=73.557, loss_att=60.681, acc=0.695, loss=64.544, backward_time=1.039, grad_norm=98.771, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.558e-05, train_time=2.820 +[gpub007:0/64] 2023-07-10 05:39:51,847 (trainer:732) INFO: 31epoch:train:701-800batch: iter_time=1.078e-04, forward_time=0.143, loss_ctc=78.927, loss_att=60.176, acc=0.707, loss=65.802, backward_time=1.027, grad_norm=101.784, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.557e-05, train_time=2.765 +[gpub007:0/64] 2023-07-10 05:41:04,583 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-10 05:41:22,351 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 05:41:25,696 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 05:41:25,696 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-10 05:41:25,818 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 05:46:54,397 (trainer:732) INFO: 31epoch:train:801-900batch: iter_time=2.610, forward_time=0.147, loss_ctc=84.353, loss_att=59.212, acc=0.713, loss=66.754, backward_time=1.050, grad_norm=113.914, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.179, optim0_lr0=6.556e-05, train_time=8.451 +[gpub007:0/64] 2023-07-10 05:49:10,529 (trainer:732) INFO: 31epoch:train:901-1000batch: iter_time=1.215e-04, forward_time=0.144, loss_ctc=80.485, loss_att=64.454, acc=0.685, loss=69.263, backward_time=1.026, grad_norm=113.670, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.555e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 05:51:25,984 (trainer:732) INFO: 31epoch:train:1001-1100batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=73.551, loss_att=55.639, acc=0.676, loss=61.013, backward_time=1.024, grad_norm=112.044, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.553e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 05:53:41,424 (trainer:732) INFO: 31epoch:train:1101-1200batch: iter_time=1.319e-04, forward_time=0.146, loss_ctc=72.723, loss_att=58.415, acc=0.710, loss=62.707, backward_time=1.025, grad_norm=111.492, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.181, optim0_lr0=6.552e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 05:55:57,268 (trainer:732) INFO: 31epoch:train:1201-1300batch: iter_time=1.616e-04, forward_time=0.147, loss_ctc=66.671, loss_att=53.081, acc=0.717, loss=57.158, backward_time=1.027, grad_norm=98.750, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.181, optim0_lr0=6.551e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 05:58:12,720 (trainer:732) INFO: 31epoch:train:1301-1400batch: iter_time=1.501e-04, forward_time=0.147, loss_ctc=76.363, loss_att=59.188, acc=0.694, loss=64.341, backward_time=1.025, grad_norm=114.733, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.181, optim0_lr0=6.550e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 06:00:28,243 (trainer:732) INFO: 31epoch:train:1401-1500batch: iter_time=1.642e-04, forward_time=0.148, loss_ctc=75.285, loss_att=58.303, acc=0.706, loss=63.398, backward_time=1.026, grad_norm=103.269, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.181, optim0_lr0=6.549e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 06:02:43,552 (trainer:732) INFO: 31epoch:train:1501-1600batch: iter_time=1.357e-04, forward_time=0.145, loss_ctc=74.621, loss_att=54.813, acc=0.698, loss=60.756, backward_time=1.022, grad_norm=114.863, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.548e-05, train_time=2.706 +[gpub007:0/64] 2023-07-10 06:04:14,468 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-10 06:04:32,666 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 06:04:36,091 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 06:04:36,091 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-10 06:04:36,097 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 06:10:05,276 (trainer:732) INFO: 31epoch:train:1601-1700batch: iter_time=1.268, forward_time=0.177, loss_ctc=83.861, loss_att=62.946, acc=0.716, loss=69.221, backward_time=1.036, grad_norm=111.788, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.547e-05, train_time=8.834 +[gpub007:0/64] 2023-07-10 06:12:21,570 (trainer:732) INFO: 31epoch:train:1701-1800batch: iter_time=1.262e-04, forward_time=0.144, loss_ctc=84.358, loss_att=61.690, acc=0.703, loss=68.491, backward_time=1.027, grad_norm=128.704, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.546e-05, train_time=2.726 +[gpub007:0/64] 2023-07-10 06:14:37,387 (trainer:732) INFO: 31epoch:train:1801-1900batch: iter_time=1.264e-04, forward_time=0.144, loss_ctc=77.029, loss_att=58.693, acc=0.688, loss=64.194, backward_time=1.022, grad_norm=100.689, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.179, optim0_lr0=6.544e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 06:16:53,035 (trainer:732) INFO: 31epoch:train:1901-2000batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=68.921, loss_att=52.419, acc=0.709, loss=57.370, backward_time=1.023, grad_norm=108.763, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.179, optim0_lr0=6.543e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 06:19:08,914 (trainer:732) INFO: 31epoch:train:2001-2100batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=68.731, loss_att=53.877, acc=0.724, loss=58.333, backward_time=1.025, grad_norm=116.687, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.542e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 06:21:24,934 (trainer:732) INFO: 31epoch:train:2101-2200batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=77.295, loss_att=63.073, acc=0.695, loss=67.340, backward_time=1.026, grad_norm=121.316, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.541e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 06:23:41,908 (trainer:732) INFO: 31epoch:train:2201-2300batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=76.729, loss_att=58.069, acc=0.719, loss=63.667, backward_time=1.028, grad_norm=96.608, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.540e-05, train_time=2.739 +[gpub007:0/64] 2023-07-10 06:26:08,069 (trainer:732) INFO: 31epoch:train:2301-2400batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=69.112, loss_att=51.060, acc=0.720, loss=56.476, backward_time=1.035, grad_norm=91.312, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.539e-05, train_time=2.923 +[gpub007:0/64] 2023-07-10 06:28:26,644 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-10 06:28:44,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 06:28:48,049 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 06:28:48,049 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-10 06:28:48,056 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 06:32:15,761 (trainer:732) INFO: 31epoch:train:2401-2500batch: iter_time=1.333, forward_time=0.155, loss_ctc=82.287, loss_att=63.709, acc=0.701, loss=69.282, backward_time=1.033, grad_norm=121.817, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.538e-05, train_time=7.352 +[gpub007:0/64] 2023-07-10 06:34:33,345 (trainer:732) INFO: 31epoch:train:2501-2600batch: iter_time=1.029e-04, forward_time=0.146, loss_ctc=83.004, loss_att=63.233, acc=0.707, loss=69.164, backward_time=1.032, grad_norm=133.547, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.537e-05, train_time=2.753 +[gpub007:0/64] 2023-07-10 06:36:49,325 (trainer:732) INFO: 31epoch:train:2601-2700batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=75.378, loss_att=57.609, acc=0.688, loss=62.940, backward_time=1.026, grad_norm=107.810, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.535e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 06:39:04,998 (trainer:732) INFO: 31epoch:train:2701-2800batch: iter_time=9.780e-05, forward_time=0.145, loss_ctc=69.226, loss_att=52.124, acc=0.709, loss=57.255, backward_time=1.024, grad_norm=97.581, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.534e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 06:41:21,063 (trainer:732) INFO: 31epoch:train:2801-2900batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=68.771, loss_att=53.569, acc=0.723, loss=58.130, backward_time=1.026, grad_norm=95.572, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.533e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 06:43:36,896 (trainer:732) INFO: 31epoch:train:2901-3000batch: iter_time=1.308e-04, forward_time=0.145, loss_ctc=77.273, loss_att=63.630, acc=0.695, loss=67.723, backward_time=1.025, grad_norm=111.271, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.532e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 06:45:52,595 (trainer:732) INFO: 31epoch:train:3001-3100batch: iter_time=1.373e-04, forward_time=0.145, loss_ctc=74.247, loss_att=57.709, acc=0.714, loss=62.671, backward_time=1.025, grad_norm=107.453, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.531e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 06:48:08,299 (trainer:732) INFO: 31epoch:train:3101-3200batch: iter_time=1.367e-04, forward_time=0.146, loss_ctc=69.916, loss_att=51.295, acc=0.719, loss=56.881, backward_time=1.025, grad_norm=117.528, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.530e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 06:50:25,122 (trainer:732) INFO: 31epoch:train:3201-3300batch: iter_time=1.503e-04, forward_time=0.146, loss_ctc=81.122, loss_att=63.230, acc=0.700, loss=68.598, backward_time=1.025, grad_norm=121.151, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.529e-05, train_time=2.736 +[gpub007:0/64] 2023-07-10 06:51:12,584 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-10 06:51:30,720 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 06:51:34,138 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 06:51:34,139 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-10 06:51:34,145 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 06:57:49,606 (trainer:732) INFO: 31epoch:train:3301-3400batch: iter_time=1.315, forward_time=0.145, loss_ctc=79.129, loss_att=54.895, acc=0.716, loss=62.165, backward_time=1.040, grad_norm=112.164, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.528e-05, train_time=8.889 +[gpub007:0/64] 2023-07-10 07:00:05,807 (trainer:732) INFO: 31epoch:train:3401-3500batch: iter_time=1.349e-04, forward_time=0.146, loss_ctc=78.453, loss_att=63.141, acc=0.690, loss=67.735, backward_time=1.027, grad_norm=113.408, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.527e-05, train_time=2.724 +[gpub007:0/64] 2023-07-10 07:02:22,047 (trainer:732) INFO: 31epoch:train:3501-3600batch: iter_time=1.149e-04, forward_time=0.145, loss_ctc=71.009, loss_att=53.688, acc=0.682, loss=58.884, backward_time=1.026, grad_norm=125.002, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.525e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 07:04:37,666 (trainer:732) INFO: 31epoch:train:3601-3700batch: iter_time=1.182e-04, forward_time=0.144, loss_ctc=71.425, loss_att=57.283, acc=0.713, loss=61.525, backward_time=1.025, grad_norm=109.352, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.524e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 07:06:53,025 (trainer:732) INFO: 31epoch:train:3701-3800batch: iter_time=1.310e-04, forward_time=0.144, loss_ctc=66.441, loss_att=52.903, acc=0.720, loss=56.964, backward_time=1.022, grad_norm=104.171, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.523e-05, train_time=2.707 +[gpub007:0/64] 2023-07-10 07:09:08,320 (trainer:732) INFO: 31epoch:train:3801-3900batch: iter_time=1.332e-04, forward_time=0.143, loss_ctc=75.756, loss_att=59.384, acc=0.694, loss=64.296, backward_time=1.022, grad_norm=121.042, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.522e-05, train_time=2.706 +[gpub007:0/64] 2023-07-10 07:11:24,379 (trainer:732) INFO: 31epoch:train:3901-4000batch: iter_time=1.335e-04, forward_time=0.145, loss_ctc=73.930, loss_att=57.143, acc=0.710, loss=62.179, backward_time=1.024, grad_norm=126.698, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.521e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 07:13:44,981 (trainer:732) INFO: 31epoch:train:4001-4100batch: iter_time=1.409e-04, forward_time=0.145, loss_ctc=70.275, loss_att=52.938, acc=0.709, loss=58.139, backward_time=1.033, grad_norm=126.609, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.520e-05, train_time=2.812 +[gpub007:0/64] 2023-07-10 07:15:16,971 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-10 07:15:34,896 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 07:15:38,324 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 07:15:38,324 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-10 07:15:38,331 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 07:19:40,883 (trainer:732) INFO: 31epoch:train:4101-4200batch: iter_time=1.314, forward_time=0.189, loss_ctc=82.970, loss_att=61.416, acc=0.719, loss=67.882, backward_time=1.037, grad_norm=108.322, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.181, optim0_lr0=6.519e-05, train_time=7.118 +[gpub007:0/64] 2023-07-10 07:21:57,691 (trainer:732) INFO: 31epoch:train:4201-4300batch: iter_time=1.415e-04, forward_time=0.147, loss_ctc=83.531, loss_att=60.252, acc=0.708, loss=67.236, backward_time=1.028, grad_norm=125.113, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.518e-05, train_time=2.736 +[gpub007:0/64] 2023-07-10 07:24:16,169 (trainer:732) INFO: 31epoch:train:4301-4400batch: iter_time=1.291e-04, forward_time=0.146, loss_ctc=75.635, loss_att=58.019, acc=0.697, loss=63.304, backward_time=1.029, grad_norm=123.408, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.517e-05, train_time=2.769 +[gpub007:0/64] 2023-07-10 07:26:35,350 (trainer:732) INFO: 31epoch:train:4401-4500batch: iter_time=1.082e-04, forward_time=0.146, loss_ctc=67.863, loss_att=51.370, acc=0.716, loss=56.318, backward_time=1.030, grad_norm=91.973, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.515e-05, train_time=2.783 +[gpub007:0/64] 2023-07-10 07:28:58,182 (trainer:732) INFO: 31epoch:train:4501-4600batch: iter_time=1.202e-04, forward_time=0.145, loss_ctc=67.304, loss_att=53.372, acc=0.726, loss=57.551, backward_time=1.040, grad_norm=97.524, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.514e-05, train_time=2.856 +[gpub007:0/64] 2023-07-10 07:31:14,230 (trainer:732) INFO: 31epoch:train:4601-4700batch: iter_time=1.212e-04, forward_time=0.146, loss_ctc=74.767, loss_att=61.094, acc=0.703, loss=65.196, backward_time=1.028, grad_norm=120.756, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.513e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 07:33:33,530 (trainer:732) INFO: 31epoch:train:4701-4800batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=75.438, loss_att=57.519, acc=0.721, loss=62.895, backward_time=1.031, grad_norm=95.142, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.512e-05, train_time=2.786 +[gpub007:0/64] 2023-07-10 07:35:55,596 (trainer:732) INFO: 31epoch:train:4801-4900batch: iter_time=1.418e-04, forward_time=0.145, loss_ctc=68.786, loss_att=51.083, acc=0.717, loss=56.394, backward_time=1.030, grad_norm=92.529, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.511e-05, train_time=2.841 +[gpub007:0/64] 2023-07-10 07:38:20,546 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-10 07:38:38,979 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 07:38:42,415 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 07:38:42,415 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-10 07:38:42,421 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 07:41:52,558 (trainer:732) INFO: 31epoch:train:4901-5000batch: iter_time=2.120, forward_time=0.145, loss_ctc=81.459, loss_att=62.737, acc=0.703, loss=68.354, backward_time=1.031, grad_norm=105.027, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.510e-05, train_time=7.138 +[gpub007:0/64] 2023-07-10 07:44:10,065 (trainer:732) INFO: 31epoch:train:5001-5100batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=78.911, loss_att=55.235, acc=0.708, loss=62.338, backward_time=1.030, grad_norm=120.308, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.509e-05, train_time=2.751 +[gpub007:0/64] 2023-07-10 07:46:26,365 (trainer:732) INFO: 31epoch:train:5101-5200batch: iter_time=1.249e-04, forward_time=0.144, loss_ctc=77.632, loss_att=62.702, acc=0.705, loss=67.181, backward_time=1.027, grad_norm=108.082, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.508e-05, train_time=2.726 +[gpub007:0/64] 2023-07-10 07:48:41,519 (trainer:732) INFO: 31epoch:train:5201-5300batch: iter_time=1.358e-04, forward_time=0.144, loss_ctc=71.878, loss_att=53.872, acc=0.682, loss=59.274, backward_time=1.022, grad_norm=98.262, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.507e-05, train_time=2.703 +[gpub007:0/64] 2023-07-10 07:50:56,946 (trainer:732) INFO: 31epoch:train:5301-5400batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=65.458, loss_att=52.277, acc=0.719, loss=56.231, backward_time=1.022, grad_norm=89.313, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.506e-05, train_time=2.708 +[gpub007:0/64] 2023-07-10 07:53:12,319 (trainer:732) INFO: 31epoch:train:5401-5500batch: iter_time=1.247e-04, forward_time=0.144, loss_ctc=70.341, loss_att=56.945, acc=0.714, loss=60.964, backward_time=1.023, grad_norm=99.924, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.504e-05, train_time=2.707 +[gpub007:0/64] 2023-07-10 07:55:27,908 (trainer:732) INFO: 31epoch:train:5501-5600batch: iter_time=1.189e-04, forward_time=0.144, loss_ctc=75.973, loss_att=54.691, acc=0.715, loss=61.076, backward_time=1.024, grad_norm=113.552, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.503e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 07:57:43,611 (trainer:732) INFO: 31epoch:train:5601-5700batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=70.888, loss_att=58.277, acc=0.697, loss=62.060, backward_time=1.024, grad_norm=99.138, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.502e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 07:59:58,703 (trainer:732) INFO: 31epoch:train:5701-5800batch: iter_time=1.292e-04, forward_time=0.143, loss_ctc=78.522, loss_att=58.973, acc=0.707, loss=64.838, backward_time=1.022, grad_norm=105.973, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.501e-05, train_time=2.702 +[gpub007:0/64] 2023-07-10 08:00:43,914 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-10 08:01:01,700 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 08:01:05,118 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 08:01:05,118 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-10 08:01:05,124 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 08:06:45,413 (trainer:732) INFO: 31epoch:train:5801-5900batch: iter_time=1.267, forward_time=0.145, loss_ctc=82.584, loss_att=58.726, acc=0.720, loss=65.883, backward_time=1.045, grad_norm=115.873, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.500e-05, train_time=8.134 +[gpub007:0/64] 2023-07-10 08:09:01,769 (trainer:732) INFO: 31epoch:train:5901-6000batch: iter_time=1.203e-04, forward_time=0.143, loss_ctc=80.148, loss_att=61.257, acc=0.703, loss=66.925, backward_time=1.027, grad_norm=120.235, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.499e-05, train_time=2.727 +[gpub007:0/64] 2023-07-10 08:11:17,607 (trainer:732) INFO: 31epoch:train:6001-6100batch: iter_time=1.214e-04, forward_time=0.144, loss_ctc=72.256, loss_att=54.572, acc=0.693, loss=59.877, backward_time=1.025, grad_norm=124.845, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.498e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 08:13:33,227 (trainer:732) INFO: 31epoch:train:6101-6200batch: iter_time=1.243e-04, forward_time=0.145, loss_ctc=70.336, loss_att=55.625, acc=0.725, loss=60.038, backward_time=1.026, grad_norm=93.646, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.497e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 08:15:48,885 (trainer:732) INFO: 31epoch:train:6201-6300batch: iter_time=1.307e-04, forward_time=0.144, loss_ctc=65.446, loss_att=49.376, acc=0.730, loss=54.197, backward_time=1.026, grad_norm=98.134, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.496e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 08:18:04,660 (trainer:732) INFO: 31epoch:train:6301-6400batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=74.073, loss_att=57.696, acc=0.714, loss=62.609, backward_time=1.026, grad_norm=115.089, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.495e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 08:20:20,132 (trainer:732) INFO: 31epoch:train:6401-6500batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=74.540, loss_att=58.242, acc=0.710, loss=63.131, backward_time=1.024, grad_norm=105.523, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.493e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 08:22:35,251 (trainer:732) INFO: 31epoch:train:6501-6600batch: iter_time=1.212e-04, forward_time=0.144, loss_ctc=72.646, loss_att=54.097, acc=0.710, loss=59.662, backward_time=1.022, grad_norm=107.686, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.492e-05, train_time=2.702 +[gpub007:0/64] 2023-07-10 08:24:05,054 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-10 08:24:23,089 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 08:24:26,541 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 08:24:26,541 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-10 08:24:26,547 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 08:29:44,559 (trainer:732) INFO: 31epoch:train:6601-6700batch: iter_time=1.282, forward_time=0.210, loss_ctc=83.654, loss_att=62.545, acc=0.723, loss=68.878, backward_time=1.041, grad_norm=123.852, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.186, optim0_lr0=6.491e-05, train_time=8.586 +[gpub007:0/64] 2023-07-10 08:32:01,287 (trainer:732) INFO: 31epoch:train:6701-6800batch: iter_time=1.242e-04, forward_time=0.144, loss_ctc=83.130, loss_att=63.649, acc=0.692, loss=69.493, backward_time=1.027, grad_norm=148.253, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.490e-05, train_time=2.734 +[gpub007:0/64] 2023-07-10 08:34:16,947 (trainer:732) INFO: 31epoch:train:6801-6900batch: iter_time=1.192e-04, forward_time=0.143, loss_ctc=73.381, loss_att=55.522, acc=0.697, loss=60.880, backward_time=1.026, grad_norm=109.650, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.489e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 08:36:32,545 (trainer:732) INFO: 31epoch:train:6901-7000batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=66.644, loss_att=49.879, acc=0.710, loss=54.909, backward_time=1.025, grad_norm=106.383, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.488e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 08:38:48,247 (trainer:732) INFO: 31epoch:train:7001-7100batch: iter_time=1.357e-04, forward_time=0.144, loss_ctc=67.145, loss_att=53.493, acc=0.726, loss=57.588, backward_time=1.026, grad_norm=91.546, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.487e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 08:41:03,720 (trainer:732) INFO: 31epoch:train:7101-7200batch: iter_time=1.105e-04, forward_time=0.143, loss_ctc=75.932, loss_att=61.631, acc=0.690, loss=65.921, backward_time=1.024, grad_norm=130.659, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.486e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 08:43:19,555 (trainer:732) INFO: 31epoch:train:7201-7300batch: iter_time=1.296e-04, forward_time=0.143, loss_ctc=75.534, loss_att=57.506, acc=0.721, loss=62.914, backward_time=1.029, grad_norm=103.295, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.485e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 08:45:34,885 (trainer:732) INFO: 31epoch:train:7301-7400batch: iter_time=1.103e-04, forward_time=0.143, loss_ctc=68.888, loss_att=51.318, acc=0.711, loss=56.589, backward_time=1.025, grad_norm=100.928, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.484e-05, train_time=2.706 +[gpub007:0/64] 2023-07-10 08:48:00,624 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-10 08:48:18,954 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 08:48:22,400 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 08:48:22,401 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-10 08:48:22,407 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 08:51:10,030 (trainer:732) INFO: 31epoch:train:7401-7500batch: iter_time=1.625, forward_time=0.145, loss_ctc=80.592, loss_att=61.550, acc=0.707, loss=67.263, backward_time=1.040, grad_norm=113.468, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.483e-05, train_time=6.702 +[gpub007:0/64] 2023-07-10 08:53:27,886 (trainer:732) INFO: 31epoch:train:7501-7600batch: iter_time=1.200e-04, forward_time=0.144, loss_ctc=81.844, loss_att=63.816, acc=0.691, loss=69.224, backward_time=1.032, grad_norm=115.900, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.481e-05, train_time=2.757 +[gpub007:0/64] 2023-07-10 08:55:43,335 (trainer:732) INFO: 31epoch:train:7601-7700batch: iter_time=1.393e-04, forward_time=0.143, loss_ctc=75.429, loss_att=57.641, acc=0.689, loss=62.978, backward_time=1.022, grad_norm=105.578, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.480e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 08:57:58,931 (trainer:732) INFO: 31epoch:train:7701-7800batch: iter_time=1.332e-04, forward_time=0.144, loss_ctc=66.291, loss_att=50.761, acc=0.702, loss=55.420, backward_time=1.023, grad_norm=93.757, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.479e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 09:00:14,620 (trainer:732) INFO: 31epoch:train:7801-7900batch: iter_time=1.341e-04, forward_time=0.145, loss_ctc=68.084, loss_att=54.240, acc=0.722, loss=58.393, backward_time=1.024, grad_norm=100.937, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.478e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 09:02:30,275 (trainer:732) INFO: 31epoch:train:7901-8000batch: iter_time=1.378e-04, forward_time=0.144, loss_ctc=75.184, loss_att=61.215, acc=0.689, loss=65.405, backward_time=1.024, grad_norm=117.796, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.477e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 09:04:45,719 (trainer:732) INFO: 31epoch:train:8001-8100batch: iter_time=1.415e-04, forward_time=0.144, loss_ctc=74.700, loss_att=56.760, acc=0.718, loss=62.142, backward_time=1.024, grad_norm=102.300, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.476e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 09:07:01,307 (trainer:732) INFO: 31epoch:train:8101-8200batch: iter_time=1.358e-04, forward_time=0.145, loss_ctc=69.805, loss_att=51.574, acc=0.708, loss=57.043, backward_time=1.025, grad_norm=97.683, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.475e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 09:09:16,921 (trainer:732) INFO: 31epoch:train:8201-8300batch: iter_time=1.188e-04, forward_time=0.144, loss_ctc=80.779, loss_att=62.335, acc=0.703, loss=67.868, backward_time=1.027, grad_norm=122.167, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.474e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 09:10:01,629 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-10 09:10:19,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 09:10:23,505 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 09:10:23,505 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-10 09:10:23,551 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 09:15:20,591 (trainer:732) INFO: 31epoch:train:8301-8400batch: iter_time=2.196, forward_time=0.193, loss_ctc=78.114, loss_att=57.819, acc=0.701, loss=63.908, backward_time=1.038, grad_norm=115.633, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.182, optim0_lr0=6.473e-05, train_time=7.273 +[gpub007:0/64] 2023-07-10 09:17:36,929 (trainer:732) INFO: 31epoch:train:8401-8500batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=79.033, loss_att=62.958, acc=0.703, loss=67.781, backward_time=1.026, grad_norm=125.948, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.472e-05, train_time=2.727 +[gpub007:0/64] 2023-07-10 09:19:53,048 (trainer:732) INFO: 31epoch:train:8501-8600batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=72.547, loss_att=54.310, acc=0.695, loss=59.781, backward_time=1.027, grad_norm=104.981, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.471e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 09:22:08,655 (trainer:732) INFO: 31epoch:train:8601-8700batch: iter_time=1.234e-04, forward_time=0.143, loss_ctc=66.497, loss_att=52.702, acc=0.720, loss=56.841, backward_time=1.024, grad_norm=113.915, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.470e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 09:24:24,118 (trainer:732) INFO: 31epoch:train:8701-8800batch: iter_time=1.177e-04, forward_time=0.143, loss_ctc=72.030, loss_att=56.814, acc=0.718, loss=61.379, backward_time=1.024, grad_norm=103.505, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.468e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 09:26:39,551 (trainer:732) INFO: 31epoch:train:8801-8900batch: iter_time=1.315e-04, forward_time=0.143, loss_ctc=73.158, loss_att=54.480, acc=0.717, loss=60.083, backward_time=1.023, grad_norm=123.351, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.467e-05, train_time=2.708 +[gpub007:0/64] 2023-07-10 09:28:55,177 (trainer:732) INFO: 31epoch:train:8901-9000batch: iter_time=1.291e-04, forward_time=0.143, loss_ctc=72.336, loss_att=60.053, acc=0.698, loss=63.738, backward_time=1.024, grad_norm=113.711, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.466e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 09:31:10,632 (trainer:732) INFO: 31epoch:train:9001-9100batch: iter_time=1.258e-04, forward_time=0.144, loss_ctc=75.572, loss_att=57.949, acc=0.714, loss=63.236, backward_time=1.023, grad_norm=120.305, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.465e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 09:32:41,587 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-10 09:32:59,831 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 09:33:03,249 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 09:33:03,249 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-10 09:33:03,256 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 09:37:30,226 (trainer:732) INFO: 31epoch:train:9101-9200batch: iter_time=1.257, forward_time=0.144, loss_ctc=80.434, loss_att=59.519, acc=0.706, loss=65.793, backward_time=1.033, grad_norm=119.357, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.464e-05, train_time=7.592 +[gpub007:0/64] 2023-07-10 09:39:47,466 (trainer:732) INFO: 31epoch:train:9201-9300batch: iter_time=1.592e-04, forward_time=0.144, loss_ctc=82.936, loss_att=65.707, acc=0.686, loss=70.875, backward_time=1.027, grad_norm=109.290, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.463e-05, train_time=2.745 +[gpub007:0/64] 2023-07-10 09:42:02,890 (trainer:732) INFO: 31epoch:train:9301-9400batch: iter_time=1.533e-04, forward_time=0.143, loss_ctc=72.261, loss_att=53.272, acc=0.683, loss=58.968, backward_time=1.023, grad_norm=132.530, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.462e-05, train_time=2.708 +[gpub007:0/64] 2023-07-10 09:44:18,793 (trainer:732) INFO: 31epoch:train:9401-9500batch: iter_time=1.449e-04, forward_time=0.143, loss_ctc=68.399, loss_att=55.461, acc=0.714, loss=59.342, backward_time=1.023, grad_norm=105.996, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.461e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 09:46:34,261 (trainer:732) INFO: 31epoch:train:9501-9600batch: iter_time=1.459e-04, forward_time=0.144, loss_ctc=67.609, loss_att=52.302, acc=0.721, loss=56.894, backward_time=1.024, grad_norm=99.282, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.460e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 09:48:49,605 (trainer:732) INFO: 31epoch:train:9601-9700batch: iter_time=1.457e-04, forward_time=0.144, loss_ctc=74.109, loss_att=57.686, acc=0.702, loss=62.613, backward_time=1.023, grad_norm=106.725, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.459e-05, train_time=2.707 +[gpub007:0/64] 2023-07-10 09:51:07,550 (trainer:732) INFO: 31epoch:train:9701-9800batch: iter_time=1.520e-04, forward_time=0.144, loss_ctc=71.197, loss_att=55.314, acc=0.709, loss=60.079, backward_time=1.024, grad_norm=104.266, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.458e-05, train_time=2.759 +[gpub007:0/64] 2023-07-10 09:53:35,541 (trainer:732) INFO: 31epoch:train:9801-9900batch: iter_time=1.495e-04, forward_time=0.144, loss_ctc=70.200, loss_att=53.040, acc=0.709, loss=58.188, backward_time=1.043, grad_norm=91.119, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.457e-05, train_time=2.960 +[gpub007:0/64] 2023-07-10 09:55:51,048 (trainer:732) INFO: 31epoch:train:9901-10000batch: iter_time=1.194e-04, forward_time=0.143, loss_ctc=84.235, loss_att=62.484, acc=0.700, loss=69.010, backward_time=1.024, grad_norm=125.985, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.455e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 10:09:21,822 (trainer:338) INFO: 31epoch results: [train] iter_time=0.188, forward_time=0.147, loss_ctc=74.729, loss_att=57.404, acc=0.706, loss=62.601, backward_time=1.028, grad_norm=110.689, clip=100.000, loss_scale=8.047e+26, optim_step_time=0.180, optim0_lr0=6.510e-05, train_time=3.369, time=4 hours, 40 minutes and 55.22 seconds, total_count=280000, gpu_max_cached_mem_GB=33.922, [valid] loss_ctc=46.086, cer_ctc=0.262, loss_att=39.649, acc=0.676, cer=0.383, wer=0.989, loss=41.580, time=7 minutes and 17.18 seconds, total_count=28842, gpu_max_cached_mem_GB=37.217, [att_plot] time=6 minutes and 2.16 seconds, total_count=0, gpu_max_cached_mem_GB=37.217 +[gpub007:0/64] 2023-07-10 10:09:37,640 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub007:0/64] 2023-07-10 10:09:37,785 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/16epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/26epoch.pth +[gpub007:0/64] 2023-07-10 10:09:37,785 (trainer:272) INFO: 32/50epoch started. Estimated time to finish: 3 days, 21 hours and 15 minutes +[gpub007:0/64] 2023-07-10 10:09:37,790 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-10 10:09:55,388 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 10:09:58,777 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 10:09:58,777 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-10 10:09:58,783 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 10:15:01,640 (trainer:732) INFO: 32epoch:train:1-100batch: iter_time=1.814, forward_time=0.180, loss_ctc=82.775, loss_att=62.427, acc=0.694, loss=68.532, backward_time=1.042, grad_norm=129.186, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.184, optim0_lr0=6.454e-05, train_time=6.476 +[gpub007:0/64] 2023-07-10 10:17:18,393 (trainer:732) INFO: 32epoch:train:101-200batch: iter_time=1.226e-04, forward_time=0.146, loss_ctc=70.225, loss_att=55.701, acc=0.698, loss=60.058, backward_time=1.031, grad_norm=103.046, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.453e-05, train_time=2.735 +[gpub007:0/64] 2023-07-10 10:19:36,050 (trainer:732) INFO: 32epoch:train:201-300batch: iter_time=1.315e-04, forward_time=0.145, loss_ctc=69.830, loss_att=52.953, acc=0.705, loss=58.017, backward_time=1.035, grad_norm=112.692, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.452e-05, train_time=2.753 +[gpub007:0/64] 2023-07-10 10:21:55,852 (trainer:732) INFO: 32epoch:train:301-400batch: iter_time=1.219e-04, forward_time=0.146, loss_ctc=69.046, loss_att=51.545, acc=0.710, loss=56.796, backward_time=1.032, grad_norm=115.326, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.451e-05, train_time=2.796 +[gpub007:0/64] 2023-07-10 10:24:15,339 (trainer:732) INFO: 32epoch:train:401-500batch: iter_time=1.285e-04, forward_time=0.147, loss_ctc=72.322, loss_att=57.455, acc=0.704, loss=61.915, backward_time=1.034, grad_norm=102.002, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.450e-05, train_time=2.790 +[gpub007:0/64] 2023-07-10 10:26:34,466 (trainer:732) INFO: 32epoch:train:501-600batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=75.248, loss_att=53.922, acc=0.701, loss=60.320, backward_time=1.031, grad_norm=111.890, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.449e-05, train_time=2.782 +[gpub007:0/64] 2023-07-10 10:28:52,900 (trainer:732) INFO: 32epoch:train:601-700batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=59.986, loss_att=45.714, acc=0.701, loss=49.995, backward_time=1.029, grad_norm=97.759, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.448e-05, train_time=2.768 +[gpub007:0/64] 2023-07-10 10:31:08,940 (trainer:732) INFO: 32epoch:train:701-800batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=69.317, loss_att=53.875, acc=0.699, loss=58.507, backward_time=1.030, grad_norm=114.017, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.447e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 10:32:04,249 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-10 10:32:21,828 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 10:32:25,150 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 10:32:25,150 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-10 10:32:25,157 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 10:36:36,626 (trainer:732) INFO: 32epoch:train:801-900batch: iter_time=1.817, forward_time=0.170, loss_ctc=79.801, loss_att=65.876, acc=0.693, loss=70.053, backward_time=1.042, grad_norm=145.894, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.184, optim0_lr0=6.446e-05, train_time=6.553 +[gpub007:0/64] 2023-07-10 10:38:53,804 (trainer:732) INFO: 32epoch:train:901-1000batch: iter_time=1.214e-04, forward_time=0.148, loss_ctc=73.607, loss_att=56.551, acc=0.700, loss=61.668, backward_time=1.032, grad_norm=114.270, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.445e-05, train_time=2.744 +[gpub007:0/64] 2023-07-10 10:41:09,840 (trainer:732) INFO: 32epoch:train:1001-1100batch: iter_time=1.209e-04, forward_time=0.147, loss_ctc=69.558, loss_att=52.276, acc=0.710, loss=57.461, backward_time=1.030, grad_norm=110.727, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.444e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 10:43:25,512 (trainer:732) INFO: 32epoch:train:1101-1200batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=68.257, loss_att=50.284, acc=0.712, loss=55.676, backward_time=1.028, grad_norm=99.052, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.443e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 10:45:41,637 (trainer:732) INFO: 32epoch:train:1201-1300batch: iter_time=1.417e-04, forward_time=0.147, loss_ctc=73.110, loss_att=59.212, acc=0.706, loss=63.382, backward_time=1.031, grad_norm=103.008, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.442e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 10:47:57,371 (trainer:732) INFO: 32epoch:train:1301-1400batch: iter_time=1.430e-04, forward_time=0.146, loss_ctc=73.637, loss_att=52.926, acc=0.709, loss=59.139, backward_time=1.029, grad_norm=114.534, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.440e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 10:50:12,842 (trainer:732) INFO: 32epoch:train:1401-1500batch: iter_time=1.489e-04, forward_time=0.146, loss_ctc=59.377, loss_att=45.652, acc=0.704, loss=49.769, backward_time=1.026, grad_norm=93.770, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.439e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 10:52:28,905 (trainer:732) INFO: 32epoch:train:1501-1600batch: iter_time=1.378e-04, forward_time=0.148, loss_ctc=66.884, loss_att=52.542, acc=0.706, loss=56.845, backward_time=1.030, grad_norm=106.371, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.438e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 10:54:07,460 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-10 10:54:25,438 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 10:54:28,896 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 10:54:28,896 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-10 10:54:28,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 10:57:20,848 (trainer:732) INFO: 32epoch:train:1601-1700batch: iter_time=1.428, forward_time=0.147, loss_ctc=77.393, loss_att=63.046, acc=0.702, loss=67.350, backward_time=1.042, grad_norm=142.975, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.437e-05, train_time=5.839 +[gpub007:0/64] 2023-07-10 10:59:37,739 (trainer:732) INFO: 32epoch:train:1701-1800batch: iter_time=1.210e-04, forward_time=0.147, loss_ctc=78.024, loss_att=60.530, acc=0.692, loss=65.778, backward_time=1.033, grad_norm=110.661, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.436e-05, train_time=2.738 +[gpub007:0/64] 2023-07-10 11:01:53,900 (trainer:732) INFO: 32epoch:train:1801-1900batch: iter_time=1.239e-04, forward_time=0.148, loss_ctc=71.371, loss_att=54.012, acc=0.713, loss=59.220, backward_time=1.030, grad_norm=100.104, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.435e-05, train_time=2.723 +[gpub007:0/64] 2023-07-10 11:04:09,992 (trainer:732) INFO: 32epoch:train:1901-2000batch: iter_time=1.372e-04, forward_time=0.148, loss_ctc=67.395, loss_att=47.049, acc=0.715, loss=53.153, backward_time=1.029, grad_norm=104.062, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.434e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 11:06:26,406 (trainer:732) INFO: 32epoch:train:2001-2100batch: iter_time=1.225e-04, forward_time=0.149, loss_ctc=69.227, loss_att=55.455, acc=0.705, loss=59.586, backward_time=1.033, grad_norm=113.900, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.433e-05, train_time=2.728 +[gpub007:0/64] 2023-07-10 11:08:42,299 (trainer:732) INFO: 32epoch:train:2101-2200batch: iter_time=1.217e-04, forward_time=0.147, loss_ctc=74.120, loss_att=54.366, acc=0.708, loss=60.292, backward_time=1.029, grad_norm=106.641, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.432e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 11:10:58,128 (trainer:732) INFO: 32epoch:train:2201-2300batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=63.109, loss_att=45.315, acc=0.717, loss=50.653, backward_time=1.027, grad_norm=99.928, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.431e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 11:13:14,254 (trainer:732) INFO: 32epoch:train:2301-2400batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=64.608, loss_att=49.717, acc=0.706, loss=54.185, backward_time=1.030, grad_norm=105.393, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.430e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 11:15:30,280 (trainer:732) INFO: 32epoch:train:2401-2500batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=75.807, loss_att=62.505, acc=0.697, loss=66.495, backward_time=1.029, grad_norm=115.518, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.429e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 11:15:32,738 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-10 11:15:50,744 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 11:15:54,388 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 11:15:54,388 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-10 11:15:54,395 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 11:21:21,406 (trainer:732) INFO: 32epoch:train:2501-2600batch: iter_time=1.212, forward_time=0.146, loss_ctc=77.847, loss_att=58.631, acc=0.703, loss=64.396, backward_time=1.043, grad_norm=128.562, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.428e-05, train_time=7.022 +[gpub007:0/64] 2023-07-10 11:23:38,061 (trainer:732) INFO: 32epoch:train:2601-2700batch: iter_time=1.351e-04, forward_time=0.147, loss_ctc=72.761, loss_att=54.966, acc=0.706, loss=60.305, backward_time=1.030, grad_norm=118.684, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.427e-05, train_time=2.733 +[gpub007:0/64] 2023-07-10 11:25:53,614 (trainer:732) INFO: 32epoch:train:2701-2800batch: iter_time=1.263e-04, forward_time=0.145, loss_ctc=69.898, loss_att=52.190, acc=0.709, loss=57.503, backward_time=1.027, grad_norm=111.867, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.426e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 11:28:09,312 (trainer:732) INFO: 32epoch:train:2801-2900batch: iter_time=1.317e-04, forward_time=0.146, loss_ctc=68.982, loss_att=50.475, acc=0.714, loss=56.027, backward_time=1.027, grad_norm=97.441, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.424e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 11:30:25,389 (trainer:732) INFO: 32epoch:train:2901-3000batch: iter_time=1.381e-04, forward_time=0.147, loss_ctc=71.362, loss_att=57.252, acc=0.708, loss=61.485, backward_time=1.029, grad_norm=125.606, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.423e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 11:32:41,218 (trainer:732) INFO: 32epoch:train:3001-3100batch: iter_time=1.400e-04, forward_time=0.146, loss_ctc=73.432, loss_att=52.328, acc=0.709, loss=58.659, backward_time=1.028, grad_norm=109.991, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.422e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 11:34:56,587 (trainer:732) INFO: 32epoch:train:3101-3200batch: iter_time=1.361e-04, forward_time=0.146, loss_ctc=59.109, loss_att=45.449, acc=0.707, loss=49.547, backward_time=1.024, grad_norm=98.481, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.421e-05, train_time=2.707 +[gpub007:0/64] 2023-07-10 11:37:42,503 (trainer:732) INFO: 32epoch:train:3201-3300batch: iter_time=0.002, forward_time=0.155, loss_ctc=69.064, loss_att=52.243, acc=0.707, loss=57.290, backward_time=1.098, grad_norm=102.694, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.184, optim0_lr0=6.420e-05, train_time=3.318 +[gpub007:0/64] 2023-07-10 11:38:35,709 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-10 11:38:53,533 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 11:38:56,971 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 11:38:56,971 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-10 11:38:57,040 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 11:43:38,389 (trainer:732) INFO: 32epoch:train:3301-3400batch: iter_time=1.723, forward_time=0.161, loss_ctc=72.328, loss_att=59.143, acc=0.711, loss=63.098, backward_time=1.048, grad_norm=114.283, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.419e-05, train_time=7.117 +[gpub007:0/64] 2023-07-10 11:45:55,352 (trainer:732) INFO: 32epoch:train:3401-3500batch: iter_time=1.093e-04, forward_time=0.148, loss_ctc=76.344, loss_att=61.000, acc=0.693, loss=65.603, backward_time=1.032, grad_norm=126.536, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.418e-05, train_time=2.739 +[gpub007:0/64] 2023-07-10 11:48:11,190 (trainer:732) INFO: 32epoch:train:3501-3600batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=69.903, loss_att=53.202, acc=0.714, loss=58.212, backward_time=1.028, grad_norm=110.976, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.417e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 11:50:26,933 (trainer:732) INFO: 32epoch:train:3601-3700batch: iter_time=1.107e-04, forward_time=0.146, loss_ctc=66.324, loss_att=46.756, acc=0.720, loss=52.626, backward_time=1.028, grad_norm=97.591, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.416e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 11:52:43,212 (trainer:732) INFO: 32epoch:train:3701-3800batch: iter_time=1.318e-04, forward_time=0.147, loss_ctc=70.566, loss_att=55.587, acc=0.712, loss=60.081, backward_time=1.031, grad_norm=103.444, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.415e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 11:54:59,415 (trainer:732) INFO: 32epoch:train:3801-3900batch: iter_time=1.432e-04, forward_time=0.148, loss_ctc=71.701, loss_att=53.401, acc=0.713, loss=58.891, backward_time=1.029, grad_norm=132.687, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.414e-05, train_time=2.724 +[gpub007:0/64] 2023-07-10 11:57:15,042 (trainer:732) INFO: 32epoch:train:3901-4000batch: iter_time=1.482e-04, forward_time=0.146, loss_ctc=62.956, loss_att=46.177, acc=0.713, loss=51.211, backward_time=1.027, grad_norm=131.080, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.413e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 11:59:30,462 (trainer:732) INFO: 32epoch:train:4001-4100batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=65.052, loss_att=51.279, acc=0.705, loss=55.411, backward_time=1.025, grad_norm=121.887, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.412e-05, train_time=2.708 +[gpub007:0/64] 2023-07-10 12:01:02,920 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-10 12:01:21,167 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 12:01:24,596 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 12:01:24,596 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-10 12:01:24,603 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 12:06:08,873 (trainer:732) INFO: 32epoch:train:4101-4200batch: iter_time=1.328, forward_time=0.146, loss_ctc=73.547, loss_att=59.674, acc=0.699, loss=63.836, backward_time=1.039, grad_norm=122.563, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.411e-05, train_time=7.968 +[gpub007:0/64] 2023-07-10 12:08:25,367 (trainer:732) INFO: 32epoch:train:4201-4300batch: iter_time=1.381e-04, forward_time=0.146, loss_ctc=76.839, loss_att=57.023, acc=0.705, loss=62.968, backward_time=1.031, grad_norm=125.391, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.410e-05, train_time=2.730 +[gpub007:0/64] 2023-07-10 12:10:41,236 (trainer:732) INFO: 32epoch:train:4301-4400batch: iter_time=1.364e-04, forward_time=0.144, loss_ctc=73.277, loss_att=58.712, acc=0.699, loss=63.081, backward_time=1.029, grad_norm=115.620, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.409e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 12:12:57,019 (trainer:732) INFO: 32epoch:train:4401-4500batch: iter_time=1.328e-04, forward_time=0.145, loss_ctc=67.384, loss_att=46.579, acc=0.720, loss=52.821, backward_time=1.027, grad_norm=108.017, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.408e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 12:15:12,654 (trainer:732) INFO: 32epoch:train:4501-4600batch: iter_time=1.333e-04, forward_time=0.145, loss_ctc=68.630, loss_att=53.059, acc=0.714, loss=57.730, backward_time=1.026, grad_norm=107.417, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.407e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 12:17:28,644 (trainer:732) INFO: 32epoch:train:4601-4700batch: iter_time=1.533e-04, forward_time=0.146, loss_ctc=73.425, loss_att=58.843, acc=0.699, loss=63.218, backward_time=1.029, grad_norm=116.945, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.405e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 12:19:44,101 (trainer:732) INFO: 32epoch:train:4701-4800batch: iter_time=1.360e-04, forward_time=0.145, loss_ctc=64.127, loss_att=46.817, acc=0.711, loss=52.010, backward_time=1.025, grad_norm=111.013, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.404e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 12:21:59,491 (trainer:732) INFO: 32epoch:train:4801-4900batch: iter_time=1.368e-04, forward_time=0.144, loss_ctc=60.258, loss_att=46.765, acc=0.703, loss=50.813, backward_time=1.026, grad_norm=106.045, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.403e-05, train_time=2.708 +[gpub007:0/64] 2023-07-10 12:24:15,527 (trainer:732) INFO: 32epoch:train:4901-5000batch: iter_time=1.372e-04, forward_time=0.146, loss_ctc=70.694, loss_att=60.402, acc=0.692, loss=63.489, backward_time=1.029, grad_norm=96.616, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.402e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 12:24:16,949 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-10 12:24:35,286 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 12:24:38,751 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 12:24:38,751 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-10 12:24:38,757 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 12:31:22,822 (trainer:732) INFO: 32epoch:train:5001-5100batch: iter_time=1.206, forward_time=0.146, loss_ctc=78.031, loss_att=58.243, acc=0.700, loss=64.179, backward_time=1.046, grad_norm=118.668, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.401e-05, train_time=8.546 +[gpub007:0/64] 2023-07-10 12:33:39,075 (trainer:732) INFO: 32epoch:train:5101-5200batch: iter_time=1.075e-04, forward_time=0.145, loss_ctc=70.920, loss_att=54.269, acc=0.702, loss=59.264, backward_time=1.030, grad_norm=98.155, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.400e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 12:35:55,015 (trainer:732) INFO: 32epoch:train:5201-5300batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=69.300, loss_att=51.324, acc=0.714, loss=56.717, backward_time=1.028, grad_norm=100.654, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.399e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 12:38:10,958 (trainer:732) INFO: 32epoch:train:5301-5400batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=67.564, loss_att=49.426, acc=0.714, loss=54.867, backward_time=1.027, grad_norm=110.512, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.398e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 12:40:27,203 (trainer:732) INFO: 32epoch:train:5401-5500batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=71.342, loss_att=58.549, acc=0.706, loss=62.387, backward_time=1.029, grad_norm=106.432, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.397e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 12:42:42,776 (trainer:732) INFO: 32epoch:train:5501-5600batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=74.444, loss_att=52.461, acc=0.705, loss=59.055, backward_time=1.026, grad_norm=109.611, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.396e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 12:44:58,577 (trainer:732) INFO: 32epoch:train:5601-5700batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=58.589, loss_att=43.738, acc=0.712, loss=48.193, backward_time=1.027, grad_norm=92.158, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.395e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 12:47:13,794 (trainer:732) INFO: 32epoch:train:5701-5800batch: iter_time=1.385e-04, forward_time=0.143, loss_ctc=69.496, loss_att=52.929, acc=0.699, loss=57.899, backward_time=1.024, grad_norm=111.627, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.394e-05, train_time=2.704 +[gpub007:0/64] 2023-07-10 12:48:02,338 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-10 12:48:20,314 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 12:48:23,726 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 12:48:23,726 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-10 12:48:23,732 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 12:53:10,280 (trainer:732) INFO: 32epoch:train:5801-5900batch: iter_time=1.251, forward_time=0.145, loss_ctc=75.374, loss_att=62.102, acc=0.695, loss=66.084, backward_time=1.042, grad_norm=129.946, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.393e-05, train_time=7.130 +[gpub007:0/64] 2023-07-10 12:55:26,949 (trainer:732) INFO: 32epoch:train:5901-6000batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=71.717, loss_att=54.986, acc=0.703, loss=60.005, backward_time=1.031, grad_norm=113.340, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.392e-05, train_time=2.733 +[gpub007:0/64] 2023-07-10 12:57:42,683 (trainer:732) INFO: 32epoch:train:6001-6100batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=69.446, loss_att=51.857, acc=0.709, loss=57.133, backward_time=1.028, grad_norm=107.204, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.391e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 12:59:58,269 (trainer:732) INFO: 32epoch:train:6101-6200batch: iter_time=1.363e-04, forward_time=0.145, loss_ctc=66.951, loss_att=48.706, acc=0.715, loss=54.179, backward_time=1.027, grad_norm=103.728, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.390e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 13:02:14,191 (trainer:732) INFO: 32epoch:train:6201-6300batch: iter_time=1.396e-04, forward_time=0.146, loss_ctc=71.430, loss_att=59.193, acc=0.709, loss=62.864, backward_time=1.029, grad_norm=102.881, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.389e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 13:04:29,947 (trainer:732) INFO: 32epoch:train:6301-6400batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=72.094, loss_att=52.015, acc=0.704, loss=58.039, backward_time=1.027, grad_norm=119.327, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.388e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 13:06:45,375 (trainer:732) INFO: 32epoch:train:6401-6500batch: iter_time=1.612e-04, forward_time=0.146, loss_ctc=59.809, loss_att=45.554, acc=0.702, loss=49.831, backward_time=1.027, grad_norm=100.219, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.387e-05, train_time=2.708 +[gpub007:0/64] 2023-07-10 13:09:01,119 (trainer:732) INFO: 32epoch:train:6501-6600batch: iter_time=1.443e-04, forward_time=0.146, loss_ctc=66.079, loss_att=52.255, acc=0.696, loss=56.402, backward_time=1.025, grad_norm=113.589, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.386e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 13:10:33,347 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-10 13:10:51,695 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 13:10:55,130 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 13:10:55,130 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-10 13:10:55,136 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 13:14:27,555 (trainer:732) INFO: 32epoch:train:6601-6700batch: iter_time=1.221, forward_time=0.147, loss_ctc=75.588, loss_att=61.507, acc=0.698, loss=65.731, backward_time=1.044, grad_norm=117.004, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.385e-05, train_time=6.528 +[gpub007:0/64] 2023-07-10 13:16:44,594 (trainer:732) INFO: 32epoch:train:6701-6800batch: iter_time=1.203e-04, forward_time=0.147, loss_ctc=76.539, loss_att=59.523, acc=0.689, loss=64.628, backward_time=1.032, grad_norm=108.854, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.384e-05, train_time=2.741 +[gpub007:0/64] 2023-07-10 13:19:00,544 (trainer:732) INFO: 32epoch:train:6801-6900batch: iter_time=1.254e-04, forward_time=0.148, loss_ctc=69.941, loss_att=54.051, acc=0.712, loss=58.818, backward_time=1.030, grad_norm=105.180, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.382e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 13:21:16,238 (trainer:732) INFO: 32epoch:train:6901-7000batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=66.098, loss_att=47.072, acc=0.717, loss=52.780, backward_time=1.028, grad_norm=117.263, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.381e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 13:23:32,091 (trainer:732) INFO: 32epoch:train:7001-7100batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=69.137, loss_att=57.624, acc=0.702, loss=61.078, backward_time=1.029, grad_norm=111.250, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.380e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 13:25:47,876 (trainer:732) INFO: 32epoch:train:7101-7200batch: iter_time=1.346e-04, forward_time=0.147, loss_ctc=72.417, loss_att=53.978, acc=0.710, loss=59.509, backward_time=1.028, grad_norm=108.501, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.379e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 13:28:03,438 (trainer:732) INFO: 32epoch:train:7201-7300batch: iter_time=1.262e-04, forward_time=0.147, loss_ctc=61.759, loss_att=45.059, acc=0.713, loss=50.069, backward_time=1.027, grad_norm=92.099, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.378e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 13:30:19,184 (trainer:732) INFO: 32epoch:train:7301-7400batch: iter_time=1.197e-04, forward_time=0.147, loss_ctc=62.772, loss_att=49.481, acc=0.702, loss=53.468, backward_time=1.028, grad_norm=103.313, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.377e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 13:32:34,883 (trainer:732) INFO: 32epoch:train:7401-7500batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=72.987, loss_att=60.960, acc=0.690, loss=64.568, backward_time=1.027, grad_norm=99.943, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.376e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 13:32:40,288 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-10 13:32:58,275 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 13:33:01,846 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 13:33:01,846 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-10 13:33:01,852 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 13:38:46,434 (trainer:732) INFO: 32epoch:train:7501-7600batch: iter_time=1.307, forward_time=0.147, loss_ctc=75.299, loss_att=57.418, acc=0.701, loss=62.782, backward_time=1.041, grad_norm=137.132, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.375e-05, train_time=7.431 +[gpub007:0/64] 2023-07-10 13:41:02,707 (trainer:732) INFO: 32epoch:train:7601-7700batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=71.877, loss_att=54.967, acc=0.708, loss=60.040, backward_time=1.029, grad_norm=104.384, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.374e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 13:43:18,360 (trainer:732) INFO: 32epoch:train:7701-7800batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=67.383, loss_att=50.826, acc=0.711, loss=55.793, backward_time=1.027, grad_norm=109.530, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.373e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 13:45:33,983 (trainer:732) INFO: 32epoch:train:7801-7900batch: iter_time=1.142e-04, forward_time=0.145, loss_ctc=68.067, loss_att=49.300, acc=0.717, loss=54.930, backward_time=1.027, grad_norm=104.781, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.372e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 13:47:49,561 (trainer:732) INFO: 32epoch:train:7901-8000batch: iter_time=1.144e-04, forward_time=0.144, loss_ctc=72.664, loss_att=58.684, acc=0.706, loss=62.878, backward_time=1.026, grad_norm=118.286, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.371e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 13:50:08,812 (trainer:732) INFO: 32epoch:train:8001-8100batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=73.128, loss_att=52.237, acc=0.702, loss=58.505, backward_time=1.041, grad_norm=113.324, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.370e-05, train_time=2.785 +[gpub007:0/64] 2023-07-10 13:52:24,009 (trainer:732) INFO: 32epoch:train:8101-8200batch: iter_time=1.130e-04, forward_time=0.143, loss_ctc=58.571, loss_att=43.958, acc=0.708, loss=48.342, backward_time=1.025, grad_norm=93.754, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.369e-05, train_time=2.704 +[gpub007:0/64] 2023-07-10 13:54:53,526 (trainer:732) INFO: 32epoch:train:8201-8300batch: iter_time=1.266e-04, forward_time=0.154, loss_ctc=66.214, loss_att=51.471, acc=0.702, loss=55.894, backward_time=1.042, grad_norm=108.612, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.368e-05, train_time=2.990 +[gpub007:0/64] 2023-07-10 13:55:44,899 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-10 13:56:03,257 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 13:56:06,962 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 13:56:06,962 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-10 13:56:06,968 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 14:00:18,628 (trainer:732) INFO: 32epoch:train:8301-8400batch: iter_time=1.336, forward_time=0.146, loss_ctc=71.560, loss_att=56.880, acc=0.708, loss=61.284, backward_time=1.046, grad_norm=102.825, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.367e-05, train_time=6.502 +[gpub007:0/64] 2023-07-10 14:02:35,182 (trainer:732) INFO: 32epoch:train:8401-8500batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=75.128, loss_att=59.116, acc=0.694, loss=63.920, backward_time=1.029, grad_norm=112.125, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.366e-05, train_time=2.731 +[gpub007:0/64] 2023-07-10 14:04:51,087 (trainer:732) INFO: 32epoch:train:8501-8600batch: iter_time=1.232e-04, forward_time=0.145, loss_ctc=69.914, loss_att=51.710, acc=0.714, loss=57.171, backward_time=1.028, grad_norm=108.415, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.365e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 14:07:06,663 (trainer:732) INFO: 32epoch:train:8601-8700batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=66.599, loss_att=46.353, acc=0.723, loss=52.427, backward_time=1.026, grad_norm=94.420, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.364e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 14:09:22,548 (trainer:732) INFO: 32epoch:train:8701-8800batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=69.481, loss_att=57.050, acc=0.703, loss=60.780, backward_time=1.028, grad_norm=100.663, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.363e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 14:11:38,476 (trainer:732) INFO: 32epoch:train:8801-8900batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=72.168, loss_att=53.037, acc=0.711, loss=58.776, backward_time=1.028, grad_norm=122.336, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.362e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 14:13:54,152 (trainer:732) INFO: 32epoch:train:8901-9000batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=61.344, loss_att=44.963, acc=0.711, loss=49.877, backward_time=1.026, grad_norm=95.392, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.361e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 14:16:09,760 (trainer:732) INFO: 32epoch:train:9001-9100batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=62.596, loss_att=49.119, acc=0.703, loss=53.162, backward_time=1.026, grad_norm=96.887, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.360e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 14:17:42,042 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-10 14:17:59,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 14:18:03,187 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 14:18:03,187 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-10 14:18:03,194 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 14:23:03,903 (trainer:732) INFO: 32epoch:train:9101-9200batch: iter_time=1.226, forward_time=0.145, loss_ctc=73.144, loss_att=60.697, acc=0.696, loss=64.431, backward_time=1.038, grad_norm=108.879, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.359e-05, train_time=8.283 +[gpub007:0/64] 2023-07-10 14:25:21,227 (trainer:732) INFO: 32epoch:train:9201-9300batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=74.656, loss_att=56.639, acc=0.711, loss=62.044, backward_time=1.034, grad_norm=111.073, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.358e-05, train_time=2.746 +[gpub007:0/64] 2023-07-10 14:27:37,722 (trainer:732) INFO: 32epoch:train:9301-9400batch: iter_time=1.122e-04, forward_time=0.147, loss_ctc=71.766, loss_att=57.666, acc=0.712, loss=61.896, backward_time=1.030, grad_norm=109.025, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.357e-05, train_time=2.730 +[gpub007:0/64] 2023-07-10 14:29:53,547 (trainer:732) INFO: 32epoch:train:9401-9500batch: iter_time=1.220e-04, forward_time=0.146, loss_ctc=66.631, loss_att=45.648, acc=0.727, loss=51.943, backward_time=1.027, grad_norm=96.909, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.356e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 14:32:09,285 (trainer:732) INFO: 32epoch:train:9501-9600batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=68.474, loss_att=53.078, acc=0.720, loss=57.697, backward_time=1.028, grad_norm=104.949, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.355e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 14:34:25,158 (trainer:732) INFO: 32epoch:train:9601-9700batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=72.926, loss_att=58.313, acc=0.697, loss=62.697, backward_time=1.029, grad_norm=125.061, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.354e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 14:36:40,674 (trainer:732) INFO: 32epoch:train:9701-9800batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=65.064, loss_att=46.459, acc=0.723, loss=52.040, backward_time=1.026, grad_norm=110.940, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.353e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 14:38:56,247 (trainer:732) INFO: 32epoch:train:9801-9900batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=60.098, loss_att=45.816, acc=0.712, loss=50.101, backward_time=1.027, grad_norm=113.480, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.352e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 14:41:12,363 (trainer:732) INFO: 32epoch:train:9901-10000batch: iter_time=1.388e-04, forward_time=0.146, loss_ctc=68.428, loss_att=58.130, acc=0.710, loss=61.220, backward_time=1.030, grad_norm=117.880, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.181, optim0_lr0=6.350e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 14:54:12,519 (trainer:338) INFO: 32epoch results: [train] iter_time=0.169, forward_time=0.147, loss_ctc=69.828, loss_att=53.553, acc=0.706, loss=58.435, backward_time=1.031, grad_norm=110.367, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.402e-05, train_time=3.259, time=4 hours, 31 minutes and 47.4 seconds, total_count=290000, gpu_max_cached_mem_GB=37.217, [valid] loss_ctc=47.779, cer_ctc=0.273, loss_att=40.205, acc=0.660, cer=0.433, wer=1.000, loss=42.477, time=6 minutes and 52.25 seconds, total_count=29854, gpu_max_cached_mem_GB=37.217, [att_plot] time=5 minutes and 55.08 seconds, total_count=0, gpu_max_cached_mem_GB=37.217 +[gpub007:0/64] 2023-07-10 14:54:30,700 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub007:0/64] 2023-07-10 14:54:30,707 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/27epoch.pth +[gpub007:0/64] 2023-07-10 14:54:30,708 (trainer:272) INFO: 33/50epoch started. Estimated time to finish: 3 days, 14 hours and 54 minutes +[gpub007:0/64] 2023-07-10 14:54:31,356 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-10 14:54:48,831 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 14:54:52,265 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 14:54:52,265 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-10 14:54:52,377 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 15:01:51,026 (trainer:732) INFO: 33epoch:train:1-100batch: iter_time=2.982, forward_time=0.171, loss_ctc=81.309, loss_att=58.949, acc=0.687, loss=65.657, backward_time=1.042, grad_norm=140.517, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.185, optim0_lr0=6.349e-05, train_time=8.795 +[gpub007:0/64] 2023-07-10 15:04:07,486 (trainer:732) INFO: 33epoch:train:101-200batch: iter_time=1.085e-04, forward_time=0.146, loss_ctc=80.757, loss_att=58.329, acc=0.689, loss=65.057, backward_time=1.030, grad_norm=121.535, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.348e-05, train_time=2.729 +[gpub007:0/64] 2023-07-10 15:06:23,888 (trainer:732) INFO: 33epoch:train:201-300batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=70.150, loss_att=53.451, acc=0.706, loss=58.460, backward_time=1.029, grad_norm=94.366, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.347e-05, train_time=2.728 +[gpub007:0/64] 2023-07-10 15:08:45,416 (trainer:732) INFO: 33epoch:train:301-400batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=74.947, loss_att=53.595, acc=0.726, loss=60.001, backward_time=1.035, grad_norm=145.374, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.346e-05, train_time=2.830 +[gpub007:0/64] 2023-07-10 15:11:00,955 (trainer:732) INFO: 33epoch:train:401-500batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=72.597, loss_att=50.951, acc=0.715, loss=57.445, backward_time=1.027, grad_norm=162.304, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.345e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 15:13:16,840 (trainer:732) INFO: 33epoch:train:501-600batch: iter_time=1.336e-04, forward_time=0.146, loss_ctc=71.856, loss_att=56.265, acc=0.708, loss=60.942, backward_time=1.028, grad_norm=113.889, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.344e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 15:15:32,498 (trainer:732) INFO: 33epoch:train:601-700batch: iter_time=1.299e-04, forward_time=0.146, loss_ctc=71.532, loss_att=49.708, acc=0.713, loss=56.256, backward_time=1.027, grad_norm=120.484, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.343e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 15:17:49,292 (trainer:732) INFO: 33epoch:train:701-800batch: iter_time=1.353e-04, forward_time=0.146, loss_ctc=58.765, loss_att=44.584, acc=0.702, loss=48.839, backward_time=1.030, grad_norm=96.075, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.342e-05, train_time=2.736 +[gpub007:0/64] 2023-07-10 15:18:41,780 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-10 15:18:59,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 15:19:02,787 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 15:19:02,787 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-10 15:19:02,793 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 15:24:32,340 (trainer:732) INFO: 33epoch:train:801-900batch: iter_time=1.379, forward_time=0.167, loss_ctc=77.054, loss_att=59.093, acc=0.692, loss=64.482, backward_time=1.039, grad_norm=110.763, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.341e-05, train_time=8.061 +[gpub007:0/64] 2023-07-10 15:26:52,029 (trainer:732) INFO: 33epoch:train:901-1000batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=83.596, loss_att=56.990, acc=0.690, loss=64.972, backward_time=1.033, grad_norm=141.577, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.340e-05, train_time=2.794 +[gpub007:0/64] 2023-07-10 15:29:08,446 (trainer:732) INFO: 33epoch:train:1001-1100batch: iter_time=1.392e-04, forward_time=0.148, loss_ctc=68.429, loss_att=55.307, acc=0.702, loss=59.244, backward_time=1.030, grad_norm=126.627, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.339e-05, train_time=2.728 +[gpub007:0/64] 2023-07-10 15:31:24,327 (trainer:732) INFO: 33epoch:train:1101-1200batch: iter_time=1.288e-04, forward_time=0.147, loss_ctc=73.355, loss_att=52.152, acc=0.723, loss=58.512, backward_time=1.028, grad_norm=115.131, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.338e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 15:33:39,989 (trainer:732) INFO: 33epoch:train:1201-1300batch: iter_time=1.435e-04, forward_time=0.147, loss_ctc=64.075, loss_att=43.059, acc=0.728, loss=49.364, backward_time=1.028, grad_norm=95.466, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.337e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 15:35:55,804 (trainer:732) INFO: 33epoch:train:1301-1400batch: iter_time=1.343e-04, forward_time=0.147, loss_ctc=73.302, loss_att=56.271, acc=0.716, loss=61.380, backward_time=1.028, grad_norm=119.694, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.336e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 15:38:11,410 (trainer:732) INFO: 33epoch:train:1401-1500batch: iter_time=1.321e-04, forward_time=0.147, loss_ctc=72.741, loss_att=52.143, acc=0.714, loss=58.322, backward_time=1.027, grad_norm=107.157, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.335e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 15:40:26,947 (trainer:732) INFO: 33epoch:train:1501-1600batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=60.881, loss_att=46.840, acc=0.709, loss=51.052, backward_time=1.027, grad_norm=111.445, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.334e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 15:42:00,968 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-10 15:42:19,254 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 15:42:22,697 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 15:42:22,698 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-10 15:42:22,704 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 15:45:39,799 (trainer:732) INFO: 33epoch:train:1601-1700batch: iter_time=1.636, forward_time=0.149, loss_ctc=70.667, loss_att=54.373, acc=0.686, loss=59.262, backward_time=1.039, grad_norm=100.782, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.333e-05, train_time=6.257 +[gpub007:0/64] 2023-07-10 15:47:56,440 (trainer:732) INFO: 33epoch:train:1701-1800batch: iter_time=1.377e-04, forward_time=0.146, loss_ctc=82.484, loss_att=57.215, acc=0.680, loss=64.796, backward_time=1.030, grad_norm=122.345, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.332e-05, train_time=2.733 +[gpub007:0/64] 2023-07-10 15:50:12,354 (trainer:732) INFO: 33epoch:train:1801-1900batch: iter_time=1.406e-04, forward_time=0.146, loss_ctc=68.883, loss_att=56.964, acc=0.689, loss=60.540, backward_time=1.028, grad_norm=114.633, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.331e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 15:52:28,082 (trainer:732) INFO: 33epoch:train:1901-2000batch: iter_time=1.456e-04, forward_time=0.147, loss_ctc=72.270, loss_att=54.885, acc=0.713, loss=60.101, backward_time=1.028, grad_norm=111.059, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.330e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 15:54:43,802 (trainer:732) INFO: 33epoch:train:2001-2100batch: iter_time=1.321e-04, forward_time=0.146, loss_ctc=65.468, loss_att=43.872, acc=0.718, loss=50.351, backward_time=1.027, grad_norm=91.409, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.329e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 15:56:59,983 (trainer:732) INFO: 33epoch:train:2101-2200batch: iter_time=1.069e-04, forward_time=0.147, loss_ctc=70.783, loss_att=54.494, acc=0.716, loss=59.380, backward_time=1.029, grad_norm=105.521, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.328e-05, train_time=2.723 +[gpub007:0/64] 2023-07-10 15:59:15,922 (trainer:732) INFO: 33epoch:train:2201-2300batch: iter_time=1.109e-04, forward_time=0.146, loss_ctc=73.796, loss_att=53.110, acc=0.708, loss=59.316, backward_time=1.028, grad_norm=106.896, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.327e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 16:01:31,242 (trainer:732) INFO: 33epoch:train:2301-2400batch: iter_time=1.127e-04, forward_time=0.144, loss_ctc=62.133, loss_att=47.015, acc=0.702, loss=51.550, backward_time=1.024, grad_norm=94.158, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.326e-05, train_time=2.706 +[gpub007:0/64] 2023-07-10 16:03:46,902 (trainer:732) INFO: 33epoch:train:2401-2500batch: iter_time=1.129e-04, forward_time=0.145, loss_ctc=66.650, loss_att=49.489, acc=0.698, loss=54.637, backward_time=1.027, grad_norm=126.734, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.325e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 16:03:48,239 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-10 16:04:06,299 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 16:04:09,786 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 16:04:09,786 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-10 16:04:09,792 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 16:09:43,696 (trainer:732) INFO: 33epoch:train:2501-2600batch: iter_time=1.210, forward_time=0.163, loss_ctc=79.428, loss_att=58.300, acc=0.691, loss=64.638, backward_time=1.042, grad_norm=124.156, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.184, optim0_lr0=6.324e-05, train_time=7.136 +[gpub007:0/64] 2023-07-10 16:11:59,861 (trainer:732) INFO: 33epoch:train:2601-2700batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=75.844, loss_att=56.845, acc=0.689, loss=62.544, backward_time=1.029, grad_norm=132.914, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.323e-05, train_time=2.723 +[gpub007:0/64] 2023-07-10 16:14:15,858 (trainer:732) INFO: 33epoch:train:2701-2800batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=69.542, loss_att=52.712, acc=0.711, loss=57.761, backward_time=1.030, grad_norm=114.777, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.322e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 16:16:31,778 (trainer:732) INFO: 33epoch:train:2801-2900batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=73.852, loss_att=52.859, acc=0.729, loss=59.157, backward_time=1.029, grad_norm=115.952, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.321e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 16:18:47,472 (trainer:732) INFO: 33epoch:train:2901-3000batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=68.655, loss_att=48.703, acc=0.723, loss=54.688, backward_time=1.027, grad_norm=102.081, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.320e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 16:21:04,634 (trainer:732) INFO: 33epoch:train:3001-3100batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=71.203, loss_att=55.407, acc=0.714, loss=60.146, backward_time=1.028, grad_norm=99.705, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.319e-05, train_time=2.743 +[gpub007:0/64] 2023-07-10 16:23:45,327 (trainer:732) INFO: 33epoch:train:3101-3200batch: iter_time=7.377e-04, forward_time=0.195, loss_ctc=69.408, loss_att=48.348, acc=0.724, loss=54.666, backward_time=1.060, grad_norm=97.142, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.188, optim0_lr0=6.318e-05, train_time=3.214 +[gpub007:0/64] 2023-07-10 16:26:03,246 (trainer:732) INFO: 33epoch:train:3201-3300batch: iter_time=0.005, forward_time=0.152, loss_ctc=58.701, loss_att=45.362, acc=0.703, loss=49.364, backward_time=1.033, grad_norm=90.793, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.317e-05, train_time=2.758 +[gpub007:0/64] 2023-07-10 16:27:04,564 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-10 16:27:22,892 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 16:27:26,375 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 16:27:26,375 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-10 16:27:26,381 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 16:34:16,021 (trainer:732) INFO: 33epoch:train:3301-3400batch: iter_time=1.970, forward_time=0.145, loss_ctc=76.741, loss_att=58.406, acc=0.688, loss=63.907, backward_time=1.041, grad_norm=116.933, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.316e-05, train_time=9.855 +[gpub007:0/64] 2023-07-10 16:36:32,136 (trainer:732) INFO: 33epoch:train:3401-3500batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=77.940, loss_att=55.696, acc=0.688, loss=62.369, backward_time=1.028, grad_norm=121.412, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.315e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 16:38:47,675 (trainer:732) INFO: 33epoch:train:3501-3600batch: iter_time=1.211e-04, forward_time=0.145, loss_ctc=68.014, loss_att=54.887, acc=0.697, loss=58.825, backward_time=1.026, grad_norm=120.682, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.314e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 16:41:03,377 (trainer:732) INFO: 33epoch:train:3601-3700batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=72.287, loss_att=54.805, acc=0.716, loss=60.049, backward_time=1.028, grad_norm=114.066, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.313e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 16:43:18,735 (trainer:732) INFO: 33epoch:train:3701-3800batch: iter_time=1.286e-04, forward_time=0.145, loss_ctc=64.499, loss_att=43.830, acc=0.721, loss=50.031, backward_time=1.026, grad_norm=113.227, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.312e-05, train_time=2.707 +[gpub007:0/64] 2023-07-10 16:45:34,412 (trainer:732) INFO: 33epoch:train:3801-3900batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=73.673, loss_att=55.965, acc=0.715, loss=61.277, backward_time=1.028, grad_norm=117.572, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.311e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 16:47:49,943 (trainer:732) INFO: 33epoch:train:3901-4000batch: iter_time=1.230e-04, forward_time=0.146, loss_ctc=71.386, loss_att=51.773, acc=0.712, loss=57.657, backward_time=1.026, grad_norm=118.908, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.310e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 16:50:05,403 (trainer:732) INFO: 33epoch:train:4001-4100batch: iter_time=1.465e-04, forward_time=0.145, loss_ctc=61.354, loss_att=45.518, acc=0.709, loss=50.269, backward_time=1.025, grad_norm=105.199, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.309e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 16:51:36,538 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-10 16:51:54,729 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 16:51:58,191 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 16:51:58,191 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-10 16:51:58,198 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 16:56:17,698 (trainer:732) INFO: 33epoch:train:4101-4200batch: iter_time=1.214, forward_time=0.149, loss_ctc=70.786, loss_att=56.826, acc=0.687, loss=61.014, backward_time=1.045, grad_norm=103.303, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.308e-05, train_time=7.445 +[gpub007:0/64] 2023-07-10 16:58:35,498 (trainer:732) INFO: 33epoch:train:4201-4300batch: iter_time=1.257e-04, forward_time=0.148, loss_ctc=81.391, loss_att=56.493, acc=0.690, loss=63.963, backward_time=1.034, grad_norm=138.704, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.307e-05, train_time=2.756 +[gpub007:0/64] 2023-07-10 17:00:51,315 (trainer:732) INFO: 33epoch:train:4301-4400batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.151, loss_att=54.165, acc=0.692, loss=58.661, backward_time=1.028, grad_norm=108.222, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.306e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 17:03:06,996 (trainer:732) INFO: 33epoch:train:4401-4500batch: iter_time=1.135e-04, forward_time=0.146, loss_ctc=67.175, loss_att=50.140, acc=0.714, loss=55.251, backward_time=1.028, grad_norm=115.769, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.305e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 17:05:22,364 (trainer:732) INFO: 33epoch:train:4501-4600batch: iter_time=1.324e-04, forward_time=0.145, loss_ctc=69.180, loss_att=49.560, acc=0.726, loss=55.446, backward_time=1.026, grad_norm=123.860, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.304e-05, train_time=2.707 +[gpub007:0/64] 2023-07-10 17:07:38,144 (trainer:732) INFO: 33epoch:train:4601-4700batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=75.561, loss_att=57.210, acc=0.707, loss=62.715, backward_time=1.029, grad_norm=115.998, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.303e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 17:09:53,850 (trainer:732) INFO: 33epoch:train:4701-4800batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=64.681, loss_att=46.224, acc=0.717, loss=51.761, backward_time=1.029, grad_norm=104.308, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.302e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 17:12:09,364 (trainer:732) INFO: 33epoch:train:4801-4900batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=69.414, loss_att=50.143, acc=0.713, loss=55.924, backward_time=1.027, grad_norm=100.383, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.301e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 17:14:28,978 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-10 17:14:47,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 17:14:50,804 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 17:14:50,805 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-10 17:14:50,814 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 17:19:38,238 (trainer:732) INFO: 33epoch:train:4901-5000batch: iter_time=1.633, forward_time=0.175, loss_ctc=62.916, loss_att=49.597, acc=0.690, loss=53.593, backward_time=1.033, grad_norm=107.175, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.183, optim0_lr0=6.300e-05, train_time=8.977 +[gpub007:0/64] 2023-07-10 17:22:00,097 (trainer:732) INFO: 33epoch:train:5001-5100batch: iter_time=1.087e-04, forward_time=0.146, loss_ctc=78.968, loss_att=56.740, acc=0.695, loss=63.408, backward_time=1.052, grad_norm=114.389, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.299e-05, train_time=2.837 +[gpub007:0/64] 2023-07-10 17:24:22,818 (trainer:732) INFO: 33epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.145, loss_ctc=73.361, loss_att=56.222, acc=0.682, loss=61.364, backward_time=1.037, grad_norm=142.420, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.298e-05, train_time=2.854 +[gpub007:0/64] 2023-07-10 17:26:40,403 (trainer:732) INFO: 33epoch:train:5201-5300batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=70.413, loss_att=53.668, acc=0.705, loss=58.692, backward_time=1.030, grad_norm=106.880, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.297e-05, train_time=2.751 +[gpub007:0/64] 2023-07-10 17:28:56,798 (trainer:732) INFO: 33epoch:train:5301-5400batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=72.574, loss_att=53.666, acc=0.725, loss=59.338, backward_time=1.027, grad_norm=112.195, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.296e-05, train_time=2.728 +[gpub007:0/64] 2023-07-10 17:31:16,771 (trainer:732) INFO: 33epoch:train:5401-5500batch: iter_time=1.354e-04, forward_time=0.146, loss_ctc=68.498, loss_att=47.891, acc=0.720, loss=54.073, backward_time=1.030, grad_norm=108.496, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.295e-05, train_time=2.799 +[gpub007:0/64] 2023-07-10 17:33:33,130 (trainer:732) INFO: 33epoch:train:5501-5600batch: iter_time=1.450e-04, forward_time=0.147, loss_ctc=71.045, loss_att=55.443, acc=0.713, loss=60.123, backward_time=1.029, grad_norm=115.324, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.294e-05, train_time=2.727 +[gpub007:0/64] 2023-07-10 17:35:48,684 (trainer:732) INFO: 33epoch:train:5601-5700batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=68.551, loss_att=47.933, acc=0.717, loss=54.118, backward_time=1.027, grad_norm=110.816, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.293e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 17:38:04,039 (trainer:732) INFO: 33epoch:train:5701-5800batch: iter_time=1.287e-04, forward_time=0.144, loss_ctc=59.922, loss_att=43.721, acc=0.700, loss=48.581, backward_time=1.026, grad_norm=93.916, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.292e-05, train_time=2.707 +[gpub007:0/64] 2023-07-10 17:38:50,122 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-10 17:39:08,178 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 17:39:11,617 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 17:39:11,617 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-10 17:39:11,623 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 17:44:00,936 (trainer:732) INFO: 33epoch:train:5801-5900batch: iter_time=1.385, forward_time=0.145, loss_ctc=74.081, loss_att=55.293, acc=0.693, loss=60.929, backward_time=1.044, grad_norm=102.943, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.291e-05, train_time=7.138 +[gpub007:0/64] 2023-07-10 17:46:17,528 (trainer:732) INFO: 33epoch:train:5901-6000batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=73.307, loss_att=55.392, acc=0.695, loss=60.767, backward_time=1.029, grad_norm=132.164, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.290e-05, train_time=2.732 +[gpub007:0/64] 2023-07-10 17:48:33,613 (trainer:732) INFO: 33epoch:train:6001-6100batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=69.019, loss_att=52.685, acc=0.712, loss=57.585, backward_time=1.028, grad_norm=117.027, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.289e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 17:50:49,583 (trainer:732) INFO: 33epoch:train:6101-6200batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=72.669, loss_att=50.812, acc=0.733, loss=57.369, backward_time=1.029, grad_norm=111.041, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.288e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 17:53:05,443 (trainer:732) INFO: 33epoch:train:6201-6300batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=66.953, loss_att=47.202, acc=0.727, loss=53.127, backward_time=1.028, grad_norm=100.819, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.287e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 17:55:21,553 (trainer:732) INFO: 33epoch:train:6301-6400batch: iter_time=1.220e-04, forward_time=0.147, loss_ctc=70.438, loss_att=55.279, acc=0.711, loss=59.827, backward_time=1.032, grad_norm=106.207, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.286e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 17:57:37,314 (trainer:732) INFO: 33epoch:train:6401-6500batch: iter_time=1.237e-04, forward_time=0.148, loss_ctc=68.695, loss_att=48.236, acc=0.721, loss=54.373, backward_time=1.029, grad_norm=102.571, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.183, optim0_lr0=6.285e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 17:59:52,886 (trainer:732) INFO: 33epoch:train:6501-6600batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=57.891, loss_att=45.251, acc=0.700, loss=49.043, backward_time=1.027, grad_norm=111.736, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.183, optim0_lr0=6.284e-05, train_time=2.711 +[gpub007:0/64] 2023-07-10 18:01:23,929 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-10 18:01:42,172 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 18:01:45,590 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 18:01:45,590 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-10 18:01:45,597 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 18:06:35,698 (trainer:732) INFO: 33epoch:train:6601-6700batch: iter_time=1.223, forward_time=0.147, loss_ctc=71.258, loss_att=50.197, acc=0.710, loss=56.515, backward_time=1.044, grad_norm=111.125, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.283e-05, train_time=8.056 +[gpub007:0/64] 2023-07-10 18:08:52,452 (trainer:732) INFO: 33epoch:train:6701-6800batch: iter_time=1.332e-04, forward_time=0.148, loss_ctc=78.114, loss_att=54.449, acc=0.690, loss=61.548, backward_time=1.032, grad_norm=151.267, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.282e-05, train_time=2.735 +[gpub007:0/64] 2023-07-10 18:11:09,090 (trainer:732) INFO: 33epoch:train:6801-6900batch: iter_time=1.245e-04, forward_time=0.148, loss_ctc=67.217, loss_att=54.946, acc=0.697, loss=58.627, backward_time=1.031, grad_norm=124.407, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.281e-05, train_time=2.733 +[gpub007:0/64] 2023-07-10 18:13:25,167 (trainer:732) INFO: 33epoch:train:6901-7000batch: iter_time=1.224e-04, forward_time=0.147, loss_ctc=71.830, loss_att=53.603, acc=0.720, loss=59.071, backward_time=1.030, grad_norm=110.919, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.280e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 18:15:41,008 (trainer:732) INFO: 33epoch:train:7001-7100batch: iter_time=1.280e-04, forward_time=0.147, loss_ctc=63.613, loss_att=42.886, acc=0.726, loss=49.104, backward_time=1.028, grad_norm=90.472, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.279e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 18:17:57,104 (trainer:732) INFO: 33epoch:train:7101-7200batch: iter_time=1.218e-04, forward_time=0.148, loss_ctc=71.112, loss_att=53.614, acc=0.720, loss=58.863, backward_time=1.030, grad_norm=111.220, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.278e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 18:20:12,800 (trainer:732) INFO: 33epoch:train:7201-7300batch: iter_time=1.190e-04, forward_time=0.147, loss_ctc=73.507, loss_att=52.795, acc=0.710, loss=59.009, backward_time=1.027, grad_norm=108.732, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.277e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 18:22:28,504 (trainer:732) INFO: 33epoch:train:7301-7400batch: iter_time=1.163e-04, forward_time=0.148, loss_ctc=61.004, loss_att=45.528, acc=0.709, loss=50.171, backward_time=1.028, grad_norm=111.613, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.276e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 18:24:44,144 (trainer:732) INFO: 33epoch:train:7401-7500batch: iter_time=1.112e-04, forward_time=0.147, loss_ctc=66.357, loss_att=49.549, acc=0.698, loss=54.591, backward_time=1.027, grad_norm=99.908, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.275e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 18:24:45,506 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-10 18:25:03,650 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 18:25:07,132 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 18:25:07,132 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-10 18:25:07,138 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 18:30:49,786 (trainer:732) INFO: 33epoch:train:7501-7600batch: iter_time=1.256, forward_time=0.167, loss_ctc=79.656, loss_att=56.412, acc=0.699, loss=63.385, backward_time=1.051, grad_norm=132.609, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.183, optim0_lr0=6.274e-05, train_time=7.312 +[gpub007:0/64] 2023-07-10 18:33:07,622 (trainer:732) INFO: 33epoch:train:7601-7700batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=67.726, loss_att=53.320, acc=0.699, loss=57.642, backward_time=1.029, grad_norm=129.357, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.273e-05, train_time=2.757 +[gpub007:0/64] 2023-07-10 18:35:23,690 (trainer:732) INFO: 33epoch:train:7701-7800batch: iter_time=1.159e-04, forward_time=0.147, loss_ctc=65.772, loss_att=48.486, acc=0.726, loss=53.672, backward_time=1.029, grad_norm=136.827, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.272e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 18:37:39,934 (trainer:732) INFO: 33epoch:train:7801-7900batch: iter_time=1.207e-04, forward_time=0.148, loss_ctc=68.408, loss_att=47.382, acc=0.741, loss=53.689, backward_time=1.030, grad_norm=123.867, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.271e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 18:39:56,176 (trainer:732) INFO: 33epoch:train:7901-8000batch: iter_time=1.286e-04, forward_time=0.148, loss_ctc=73.520, loss_att=55.555, acc=0.724, loss=60.945, backward_time=1.031, grad_norm=106.924, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.270e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 18:42:11,859 (trainer:732) INFO: 33epoch:train:8001-8100batch: iter_time=1.205e-04, forward_time=0.146, loss_ctc=65.020, loss_att=48.260, acc=0.716, loss=53.288, backward_time=1.028, grad_norm=104.832, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.269e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 18:44:27,902 (trainer:732) INFO: 33epoch:train:8101-8200batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=69.898, loss_att=50.544, acc=0.716, loss=56.350, backward_time=1.028, grad_norm=107.278, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.268e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 18:46:43,665 (trainer:732) INFO: 33epoch:train:8201-8300batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=60.715, loss_att=46.249, acc=0.711, loss=50.589, backward_time=1.028, grad_norm=91.522, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.267e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 18:47:44,425 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-10 18:48:02,552 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 18:48:05,980 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 18:48:05,980 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-10 18:48:05,987 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 18:52:39,876 (trainer:732) INFO: 33epoch:train:8301-8400batch: iter_time=1.729, forward_time=0.184, loss_ctc=72.966, loss_att=53.592, acc=0.703, loss=59.404, backward_time=1.042, grad_norm=105.774, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.186, optim0_lr0=6.266e-05, train_time=7.123 +[gpub007:0/64] 2023-07-10 18:54:56,849 (trainer:732) INFO: 33epoch:train:8401-8500batch: iter_time=1.531e-04, forward_time=0.147, loss_ctc=75.500, loss_att=54.748, acc=0.691, loss=60.974, backward_time=1.030, grad_norm=123.647, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.265e-05, train_time=2.740 +[gpub007:0/64] 2023-07-10 18:57:13,532 (trainer:732) INFO: 33epoch:train:8501-8600batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=66.478, loss_att=54.282, acc=0.700, loss=57.941, backward_time=1.030, grad_norm=118.749, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.264e-05, train_time=2.733 +[gpub007:0/64] 2023-07-10 18:59:29,475 (trainer:732) INFO: 33epoch:train:8601-8700batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=71.725, loss_att=53.649, acc=0.719, loss=59.072, backward_time=1.028, grad_norm=197.736, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.263e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 19:01:45,336 (trainer:732) INFO: 33epoch:train:8701-8800batch: iter_time=1.342e-04, forward_time=0.147, loss_ctc=63.025, loss_att=43.130, acc=0.727, loss=49.098, backward_time=1.028, grad_norm=103.614, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.262e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 19:04:01,102 (trainer:732) INFO: 33epoch:train:8801-8900batch: iter_time=1.323e-04, forward_time=0.146, loss_ctc=71.275, loss_att=54.296, acc=0.716, loss=59.390, backward_time=1.027, grad_norm=114.658, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.261e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 19:06:21,567 (trainer:732) INFO: 33epoch:train:8901-9000batch: iter_time=1.338e-04, forward_time=0.145, loss_ctc=70.594, loss_att=51.243, acc=0.714, loss=57.048, backward_time=1.039, grad_norm=110.215, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.260e-05, train_time=2.809 +[gpub007:0/64] 2023-07-10 19:08:37,159 (trainer:732) INFO: 33epoch:train:9001-9100batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=60.715, loss_att=45.124, acc=0.711, loss=49.801, backward_time=1.027, grad_norm=89.618, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.259e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 19:10:14,028 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-10 19:10:32,057 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 19:10:35,839 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 19:10:35,839 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-10 19:10:35,845 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 19:13:48,794 (trainer:732) INFO: 33epoch:train:9101-9200batch: iter_time=1.681, forward_time=0.162, loss_ctc=70.682, loss_att=57.077, acc=0.693, loss=61.159, backward_time=1.040, grad_norm=102.706, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.258e-05, train_time=6.232 +[gpub007:0/64] 2023-07-10 19:16:04,921 (trainer:732) INFO: 33epoch:train:9201-9300batch: iter_time=1.177e-04, forward_time=0.144, loss_ctc=77.583, loss_att=53.562, acc=0.704, loss=60.768, backward_time=1.029, grad_norm=123.747, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.181, optim0_lr0=6.257e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 19:18:21,240 (trainer:732) INFO: 33epoch:train:9301-9400batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=67.520, loss_att=53.688, acc=0.701, loss=57.838, backward_time=1.028, grad_norm=107.774, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.181, optim0_lr0=6.256e-05, train_time=2.726 +[gpub007:0/64] 2023-07-10 19:20:37,860 (trainer:732) INFO: 33epoch:train:9401-9500batch: iter_time=1.226e-04, forward_time=0.147, loss_ctc=66.978, loss_att=49.402, acc=0.725, loss=54.675, backward_time=1.030, grad_norm=109.628, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.255e-05, train_time=2.732 +[gpub007:0/64] 2023-07-10 19:22:53,987 (trainer:732) INFO: 33epoch:train:9501-9600batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=68.305, loss_att=47.647, acc=0.738, loss=53.844, backward_time=1.027, grad_norm=109.822, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.254e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 19:25:10,253 (trainer:732) INFO: 33epoch:train:9601-9700batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=73.905, loss_att=55.968, acc=0.726, loss=61.349, backward_time=1.030, grad_norm=108.971, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.253e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 19:27:25,776 (trainer:732) INFO: 33epoch:train:9701-9800batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=63.140, loss_att=45.901, acc=0.717, loss=51.072, backward_time=1.026, grad_norm=104.687, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.252e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 19:29:41,827 (trainer:732) INFO: 33epoch:train:9801-9900batch: iter_time=1.271e-04, forward_time=0.147, loss_ctc=69.084, loss_att=49.034, acc=0.722, loss=55.049, backward_time=1.031, grad_norm=117.631, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.251e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 19:31:57,752 (trainer:732) INFO: 33epoch:train:9901-10000batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=62.702, loss_att=47.431, acc=0.711, loss=52.012, backward_time=1.030, grad_norm=129.784, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.250e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 19:47:05,111 (trainer:338) INFO: 33epoch results: [train] iter_time=0.193, forward_time=0.148, loss_ctc=70.146, loss_att=51.879, acc=0.709, loss=57.359, backward_time=1.031, grad_norm=114.398, clip=100.000, loss_scale=2.575e+28, optim_step_time=0.182, optim0_lr0=6.300e-05, train_time=3.329, time=4 hours, 37 minutes and 49.73 seconds, total_count=300000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=48.935, cer_ctc=0.271, loss_att=47.287, acc=0.649, cer=0.446, wer=1.000, loss=47.781, time=8 minutes and 37.42 seconds, total_count=30866, gpu_max_cached_mem_GB=37.219, [att_plot] time=6 minutes and 7.12 seconds, total_count=0, gpu_max_cached_mem_GB=37.219 +[gpub007:0/64] 2023-07-10 19:47:20,758 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub007:0/64] 2023-07-10 19:47:20,766 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/28epoch.pth +[gpub007:0/64] 2023-07-10 19:47:20,766 (trainer:272) INFO: 34/50epoch started. Estimated time to finish: 3 days, 10 hours and 22 minutes +[gpub007:0/64] 2023-07-10 19:47:20,770 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-10 19:47:39,573 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 19:47:43,243 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 19:47:43,243 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-10 19:47:43,274 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 19:59:42,068 (trainer:732) INFO: 34epoch:train:1-100batch: iter_time=5.956, forward_time=0.201, loss_ctc=69.349, loss_att=48.012, acc=0.699, loss=54.413, backward_time=1.042, grad_norm=102.808, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.185, optim0_lr0=6.249e-05, train_time=14.826 +[gpub007:0/64] 2023-07-10 20:02:00,522 (trainer:732) INFO: 34epoch:train:101-200batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=71.446, loss_att=53.919, acc=0.703, loss=59.177, backward_time=1.028, grad_norm=114.778, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.249e-05, train_time=2.769 +[gpub007:0/64] 2023-07-10 20:04:17,015 (trainer:732) INFO: 34epoch:train:201-300batch: iter_time=1.355e-04, forward_time=0.145, loss_ctc=61.524, loss_att=51.532, acc=0.691, loss=54.529, backward_time=1.025, grad_norm=102.625, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.248e-05, train_time=2.730 +[gpub007:0/64] 2023-07-10 20:06:33,815 (trainer:732) INFO: 34epoch:train:301-400batch: iter_time=1.484e-04, forward_time=0.145, loss_ctc=66.318, loss_att=50.030, acc=0.695, loss=54.917, backward_time=1.025, grad_norm=111.002, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.247e-05, train_time=2.736 +[gpub007:0/64] 2023-07-10 20:08:49,770 (trainer:732) INFO: 34epoch:train:401-500batch: iter_time=1.378e-04, forward_time=0.146, loss_ctc=81.394, loss_att=64.657, acc=0.684, loss=69.678, backward_time=1.028, grad_norm=119.604, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.246e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 20:11:08,435 (trainer:732) INFO: 34epoch:train:501-600batch: iter_time=1.356e-04, forward_time=0.145, loss_ctc=80.323, loss_att=56.481, acc=0.697, loss=63.633, backward_time=1.028, grad_norm=109.089, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.245e-05, train_time=2.773 +[gpub007:0/64] 2023-07-10 20:13:24,190 (trainer:732) INFO: 34epoch:train:601-700batch: iter_time=1.325e-04, forward_time=0.146, loss_ctc=68.599, loss_att=53.053, acc=0.711, loss=57.717, backward_time=1.029, grad_norm=143.225, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.244e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 20:15:40,219 (trainer:732) INFO: 34epoch:train:701-800batch: iter_time=1.249e-04, forward_time=0.145, loss_ctc=79.178, loss_att=58.364, acc=0.700, loss=64.608, backward_time=1.027, grad_norm=111.141, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.243e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 20:16:34,521 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-10 20:16:52,203 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 20:16:55,576 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 20:16:55,576 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-10 20:16:55,582 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 20:21:03,836 (trainer:732) INFO: 34epoch:train:801-900batch: iter_time=1.287, forward_time=0.147, loss_ctc=70.884, loss_att=52.866, acc=0.696, loss=58.272, backward_time=1.046, grad_norm=119.164, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.242e-05, train_time=6.472 +[gpub007:0/64] 2023-07-10 20:23:20,205 (trainer:732) INFO: 34epoch:train:901-1000batch: iter_time=1.497e-04, forward_time=0.147, loss_ctc=71.782, loss_att=53.483, acc=0.707, loss=58.973, backward_time=1.030, grad_norm=105.750, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.241e-05, train_time=2.727 +[gpub007:0/64] 2023-07-10 20:25:35,958 (trainer:732) INFO: 34epoch:train:1001-1100batch: iter_time=1.359e-04, forward_time=0.146, loss_ctc=61.281, loss_att=49.246, acc=0.710, loss=52.857, backward_time=1.027, grad_norm=103.242, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.240e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 20:27:52,052 (trainer:732) INFO: 34epoch:train:1101-1200batch: iter_time=1.330e-04, forward_time=0.145, loss_ctc=65.369, loss_att=47.573, acc=0.708, loss=52.912, backward_time=1.027, grad_norm=105.568, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.239e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 20:30:08,295 (trainer:732) INFO: 34epoch:train:1201-1300batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=80.498, loss_att=63.017, acc=0.694, loss=68.261, backward_time=1.031, grad_norm=133.697, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.238e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 20:32:24,060 (trainer:732) INFO: 34epoch:train:1301-1400batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=77.530, loss_att=56.703, acc=0.706, loss=62.951, backward_time=1.029, grad_norm=124.538, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.237e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 20:34:39,939 (trainer:732) INFO: 34epoch:train:1401-1500batch: iter_time=1.059e-04, forward_time=0.146, loss_ctc=68.272, loss_att=51.475, acc=0.721, loss=56.514, backward_time=1.029, grad_norm=114.620, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.236e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 20:36:55,655 (trainer:732) INFO: 34epoch:train:1501-1600batch: iter_time=1.094e-04, forward_time=0.144, loss_ctc=78.170, loss_att=58.606, acc=0.708, loss=64.475, backward_time=1.028, grad_norm=125.969, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.235e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 20:38:27,808 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-10 20:38:45,905 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 20:38:49,379 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 20:38:49,379 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-10 20:38:49,385 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 20:41:45,529 (trainer:732) INFO: 34epoch:train:1601-1700batch: iter_time=1.215, forward_time=0.145, loss_ctc=65.763, loss_att=49.462, acc=0.716, loss=54.353, backward_time=1.041, grad_norm=114.498, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.234e-05, train_time=5.797 +[gpub007:0/64] 2023-07-10 20:44:01,518 (trainer:732) INFO: 34epoch:train:1701-1800batch: iter_time=1.185e-04, forward_time=0.146, loss_ctc=67.985, loss_att=51.996, acc=0.693, loss=56.793, backward_time=1.029, grad_norm=123.291, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.233e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 20:46:17,392 (trainer:732) INFO: 34epoch:train:1801-1900batch: iter_time=1.154e-04, forward_time=0.146, loss_ctc=70.309, loss_att=53.677, acc=0.702, loss=58.666, backward_time=1.029, grad_norm=107.144, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.232e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 20:48:32,821 (trainer:732) INFO: 34epoch:train:1901-2000batch: iter_time=1.295e-04, forward_time=0.146, loss_ctc=60.184, loss_att=47.079, acc=0.705, loss=51.011, backward_time=1.025, grad_norm=109.060, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.231e-05, train_time=2.708 +[gpub007:0/64] 2023-07-10 20:50:48,321 (trainer:732) INFO: 34epoch:train:2001-2100batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=72.203, loss_att=50.001, acc=0.703, loss=56.661, backward_time=1.027, grad_norm=105.082, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.230e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 20:53:04,041 (trainer:732) INFO: 34epoch:train:2101-2200batch: iter_time=1.280e-04, forward_time=0.145, loss_ctc=72.034, loss_att=57.310, acc=0.694, loss=61.727, backward_time=1.028, grad_norm=129.261, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.229e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 20:55:19,665 (trainer:732) INFO: 34epoch:train:2201-2300batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=79.750, loss_att=58.507, acc=0.697, loss=64.880, backward_time=1.027, grad_norm=112.420, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.228e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 20:57:35,180 (trainer:732) INFO: 34epoch:train:2301-2400batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=66.119, loss_att=49.079, acc=0.718, loss=54.191, backward_time=1.028, grad_norm=99.625, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.227e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 20:59:50,806 (trainer:732) INFO: 34epoch:train:2401-2500batch: iter_time=1.231e-04, forward_time=0.145, loss_ctc=78.588, loss_att=61.308, acc=0.703, loss=66.492, backward_time=1.026, grad_norm=110.532, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.226e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 20:59:52,151 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-10 21:00:10,154 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 21:00:13,596 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 21:00:13,596 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-10 21:00:13,602 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 21:06:20,656 (trainer:732) INFO: 34epoch:train:2501-2600batch: iter_time=1.226, forward_time=0.205, loss_ctc=67.687, loss_att=47.252, acc=0.712, loss=53.382, backward_time=1.046, grad_norm=104.319, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.185, optim0_lr0=6.225e-05, train_time=7.797 +[gpub007:0/64] 2023-07-10 21:08:37,221 (trainer:732) INFO: 34epoch:train:2601-2700batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=71.634, loss_att=53.817, acc=0.710, loss=59.162, backward_time=1.031, grad_norm=126.412, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.224e-05, train_time=2.731 +[gpub007:0/64] 2023-07-10 21:10:53,003 (trainer:732) INFO: 34epoch:train:2701-2800batch: iter_time=1.325e-04, forward_time=0.146, loss_ctc=60.465, loss_att=47.102, acc=0.719, loss=51.111, backward_time=1.028, grad_norm=96.180, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.223e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 21:13:08,707 (trainer:732) INFO: 34epoch:train:2801-2900batch: iter_time=1.283e-04, forward_time=0.146, loss_ctc=65.345, loss_att=47.927, acc=0.710, loss=53.152, backward_time=1.027, grad_norm=131.391, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.222e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 21:15:24,669 (trainer:732) INFO: 34epoch:train:2901-3000batch: iter_time=1.231e-04, forward_time=0.144, loss_ctc=78.791, loss_att=61.741, acc=0.699, loss=66.856, backward_time=1.029, grad_norm=121.681, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.181, optim0_lr0=6.221e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 21:17:40,573 (trainer:732) INFO: 34epoch:train:3001-3100batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=75.539, loss_att=55.371, acc=0.713, loss=61.422, backward_time=1.028, grad_norm=128.618, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.181, optim0_lr0=6.220e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 21:19:56,364 (trainer:732) INFO: 34epoch:train:3101-3200batch: iter_time=1.306e-04, forward_time=0.144, loss_ctc=68.736, loss_att=51.547, acc=0.723, loss=56.704, backward_time=1.028, grad_norm=107.194, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.181, optim0_lr0=6.219e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 21:22:12,480 (trainer:732) INFO: 34epoch:train:3201-3300batch: iter_time=1.340e-04, forward_time=0.146, loss_ctc=78.672, loss_att=57.227, acc=0.714, loss=63.660, backward_time=1.030, grad_norm=115.871, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.218e-05, train_time=2.722 +[gpub007:0/64] 2023-07-10 21:23:09,921 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-10 21:23:28,147 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 21:23:31,586 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 21:23:31,586 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-10 21:23:31,601 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 21:29:15,023 (trainer:732) INFO: 34epoch:train:3301-3400batch: iter_time=2.754, forward_time=0.146, loss_ctc=67.965, loss_att=49.802, acc=0.713, loss=55.251, backward_time=1.043, grad_norm=95.507, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.218e-05, train_time=8.451 +[gpub007:0/64] 2023-07-10 21:31:34,354 (trainer:732) INFO: 34epoch:train:3401-3500batch: iter_time=1.313e-04, forward_time=0.146, loss_ctc=65.794, loss_att=48.214, acc=0.719, loss=53.488, backward_time=1.032, grad_norm=100.118, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.217e-05, train_time=2.786 +[gpub007:0/64] 2023-07-10 21:33:49,883 (trainer:732) INFO: 34epoch:train:3501-3600batch: iter_time=1.592e-04, forward_time=0.146, loss_ctc=64.599, loss_att=50.354, acc=0.701, loss=54.628, backward_time=1.025, grad_norm=99.074, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.216e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 21:36:05,415 (trainer:732) INFO: 34epoch:train:3601-3700batch: iter_time=1.473e-04, forward_time=0.147, loss_ctc=66.878, loss_att=49.527, acc=0.705, loss=54.733, backward_time=1.026, grad_norm=116.845, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.215e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 21:38:21,051 (trainer:732) INFO: 34epoch:train:3701-3800batch: iter_time=1.548e-04, forward_time=0.146, loss_ctc=71.792, loss_att=56.733, acc=0.691, loss=61.251, backward_time=1.027, grad_norm=114.711, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.214e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 21:40:36,738 (trainer:732) INFO: 34epoch:train:3801-3900batch: iter_time=1.420e-04, forward_time=0.146, loss_ctc=80.471, loss_att=58.192, acc=0.699, loss=64.876, backward_time=1.027, grad_norm=127.665, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.213e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 21:42:52,174 (trainer:732) INFO: 34epoch:train:3901-4000batch: iter_time=1.440e-04, forward_time=0.146, loss_ctc=68.267, loss_att=50.941, acc=0.715, loss=56.139, backward_time=1.025, grad_norm=97.636, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.212e-05, train_time=2.709 +[gpub007:0/64] 2023-07-10 21:45:07,809 (trainer:732) INFO: 34epoch:train:4001-4100batch: iter_time=1.322e-04, forward_time=0.146, loss_ctc=76.407, loss_att=54.587, acc=0.714, loss=61.133, backward_time=1.028, grad_norm=117.066, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.211e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 21:46:38,902 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-10 21:46:56,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 21:47:00,307 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 21:47:00,307 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-10 21:47:00,313 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 21:50:44,681 (trainer:732) INFO: 34epoch:train:4101-4200batch: iter_time=1.211, forward_time=0.147, loss_ctc=67.382, loss_att=49.627, acc=0.728, loss=54.953, backward_time=1.041, grad_norm=101.570, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.210e-05, train_time=6.737 +[gpub007:0/64] 2023-07-10 21:53:01,360 (trainer:732) INFO: 34epoch:train:4201-4300batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=68.126, loss_att=53.262, acc=0.705, loss=57.721, backward_time=1.030, grad_norm=121.194, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.209e-05, train_time=2.733 +[gpub007:0/64] 2023-07-10 21:55:17,215 (trainer:732) INFO: 34epoch:train:4301-4400batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=68.569, loss_att=50.240, acc=0.715, loss=55.739, backward_time=1.029, grad_norm=104.723, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.208e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 21:57:33,187 (trainer:732) INFO: 34epoch:train:4401-4500batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=60.201, loss_att=45.707, acc=0.722, loss=50.055, backward_time=1.029, grad_norm=116.262, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.207e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 21:59:48,907 (trainer:732) INFO: 34epoch:train:4501-4600batch: iter_time=1.199e-04, forward_time=0.145, loss_ctc=72.214, loss_att=51.469, acc=0.706, loss=57.693, backward_time=1.028, grad_norm=117.058, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.206e-05, train_time=2.714 +[gpub007:0/64] 2023-07-10 22:02:05,114 (trainer:732) INFO: 34epoch:train:4601-4700batch: iter_time=1.239e-04, forward_time=0.145, loss_ctc=72.669, loss_att=56.498, acc=0.707, loss=61.349, backward_time=1.030, grad_norm=122.328, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.205e-05, train_time=2.724 +[gpub007:0/64] 2023-07-10 22:04:21,389 (trainer:732) INFO: 34epoch:train:4701-4800batch: iter_time=1.226e-04, forward_time=0.146, loss_ctc=78.784, loss_att=57.694, acc=0.718, loss=64.021, backward_time=1.032, grad_norm=121.739, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.204e-05, train_time=2.725 +[gpub007:0/64] 2023-07-10 22:06:37,178 (trainer:732) INFO: 34epoch:train:4801-4900batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=65.076, loss_att=49.119, acc=0.721, loss=53.906, backward_time=1.029, grad_norm=104.694, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.203e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 22:08:53,142 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-10 22:09:10,979 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 22:09:14,481 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 22:09:14,482 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-10 22:09:14,488 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 22:13:11,552 (trainer:732) INFO: 34epoch:train:4901-5000batch: iter_time=1.224, forward_time=0.146, loss_ctc=77.627, loss_att=59.803, acc=0.717, loss=65.150, backward_time=1.036, grad_norm=109.199, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.202e-05, train_time=7.887 +[gpub007:0/64] 2023-07-10 22:15:29,578 (trainer:732) INFO: 34epoch:train:5001-5100batch: iter_time=1.154e-04, forward_time=0.147, loss_ctc=67.150, loss_att=46.137, acc=0.723, loss=52.441, backward_time=1.038, grad_norm=110.471, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.201e-05, train_time=2.760 +[gpub007:0/64] 2023-07-10 22:17:46,037 (trainer:732) INFO: 34epoch:train:5101-5200batch: iter_time=1.256e-04, forward_time=0.145, loss_ctc=70.295, loss_att=53.173, acc=0.716, loss=58.309, backward_time=1.028, grad_norm=114.633, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.200e-05, train_time=2.729 +[gpub007:0/64] 2023-07-10 22:20:01,627 (trainer:732) INFO: 34epoch:train:5201-5300batch: iter_time=1.311e-04, forward_time=0.145, loss_ctc=59.916, loss_att=46.780, acc=0.720, loss=50.721, backward_time=1.027, grad_norm=106.684, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.199e-05, train_time=2.712 +[gpub007:0/64] 2023-07-10 22:22:17,708 (trainer:732) INFO: 34epoch:train:5301-5400batch: iter_time=1.188e-04, forward_time=0.147, loss_ctc=65.392, loss_att=47.487, acc=0.715, loss=52.858, backward_time=1.029, grad_norm=84.710, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.198e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 22:24:37,033 (trainer:732) INFO: 34epoch:train:5401-5500batch: iter_time=1.233e-04, forward_time=0.145, loss_ctc=77.678, loss_att=61.020, acc=0.701, loss=66.017, backward_time=1.032, grad_norm=122.241, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.197e-05, train_time=2.786 +[gpub007:0/64] 2023-07-10 22:26:59,290 (trainer:732) INFO: 34epoch:train:5501-5600batch: iter_time=1.178e-04, forward_time=0.146, loss_ctc=74.614, loss_att=53.719, acc=0.718, loss=59.988, backward_time=1.049, grad_norm=118.817, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.196e-05, train_time=2.845 +[gpub007:0/64] 2023-07-10 22:29:29,744 (trainer:732) INFO: 34epoch:train:5601-5700batch: iter_time=1.290e-04, forward_time=0.146, loss_ctc=68.752, loss_att=52.264, acc=0.727, loss=57.210, backward_time=1.043, grad_norm=87.526, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.196e-05, train_time=3.009 +[gpub007:0/64] 2023-07-10 22:31:45,668 (trainer:732) INFO: 34epoch:train:5701-5800batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=75.418, loss_att=56.265, acc=0.719, loss=62.011, backward_time=1.030, grad_norm=141.473, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.195e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 22:32:35,295 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-10 22:32:52,948 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 22:32:56,273 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 22:32:56,273 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-10 22:32:56,288 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 22:38:22,074 (trainer:732) INFO: 34epoch:train:5801-5900batch: iter_time=1.241, forward_time=0.145, loss_ctc=68.901, loss_att=49.472, acc=0.721, loss=55.301, backward_time=1.045, grad_norm=106.864, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.194e-05, train_time=7.928 +[gpub007:0/64] 2023-07-10 22:40:52,202 (trainer:732) INFO: 34epoch:train:5901-6000batch: iter_time=1.214e-04, forward_time=0.154, loss_ctc=65.372, loss_att=48.613, acc=0.722, loss=53.640, backward_time=1.053, grad_norm=102.720, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.193e-05, train_time=3.002 +[gpub007:0/64] 2023-07-10 22:43:19,810 (trainer:732) INFO: 34epoch:train:6001-6100batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=64.195, loss_att=49.224, acc=0.715, loss=53.716, backward_time=1.038, grad_norm=95.561, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.192e-05, train_time=2.952 +[gpub007:0/64] 2023-07-10 22:45:35,490 (trainer:732) INFO: 34epoch:train:6101-6200batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=66.535, loss_att=49.513, acc=0.718, loss=54.620, backward_time=1.026, grad_norm=100.886, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.191e-05, train_time=2.713 +[gpub007:0/64] 2023-07-10 22:48:03,899 (trainer:732) INFO: 34epoch:train:6201-6300batch: iter_time=1.178e-04, forward_time=0.146, loss_ctc=71.073, loss_att=55.888, acc=0.700, loss=60.443, backward_time=1.055, grad_norm=104.337, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.190e-05, train_time=2.968 +[gpub007:0/64] 2023-07-10 22:50:26,174 (trainer:732) INFO: 34epoch:train:6301-6400batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=76.179, loss_att=56.767, acc=0.712, loss=62.590, backward_time=1.036, grad_norm=121.764, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.189e-05, train_time=2.845 +[gpub007:0/64] 2023-07-10 22:52:42,073 (trainer:732) INFO: 34epoch:train:6401-6500batch: iter_time=1.214e-04, forward_time=0.145, loss_ctc=69.003, loss_att=51.965, acc=0.727, loss=57.076, backward_time=1.028, grad_norm=126.176, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.188e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 22:54:58,126 (trainer:732) INFO: 34epoch:train:6501-6600batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=75.306, loss_att=55.111, acc=0.719, loss=61.170, backward_time=1.030, grad_norm=107.882, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.187e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 22:56:30,823 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-10 22:56:48,984 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 22:56:52,426 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 22:56:52,427 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-10 22:56:52,433 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 23:01:24,170 (trainer:732) INFO: 34epoch:train:6601-6700batch: iter_time=1.295, forward_time=0.191, loss_ctc=67.514, loss_att=50.071, acc=0.726, loss=55.304, backward_time=1.041, grad_norm=91.689, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.185, optim0_lr0=6.186e-05, train_time=7.720 +[gpub007:0/64] 2023-07-10 23:03:41,084 (trainer:732) INFO: 34epoch:train:6701-6800batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=67.060, loss_att=53.087, acc=0.701, loss=57.279, backward_time=1.027, grad_norm=122.027, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.185e-05, train_time=2.738 +[gpub007:0/64] 2023-07-10 23:05:56,990 (trainer:732) INFO: 34epoch:train:6801-6900batch: iter_time=1.420e-04, forward_time=0.146, loss_ctc=68.406, loss_att=50.635, acc=0.706, loss=55.966, backward_time=1.030, grad_norm=95.825, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.184e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 23:08:12,338 (trainer:732) INFO: 34epoch:train:6901-7000batch: iter_time=1.263e-04, forward_time=0.145, loss_ctc=62.076, loss_att=47.901, acc=0.708, loss=52.153, backward_time=1.025, grad_norm=110.676, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.183e-05, train_time=2.707 +[gpub007:0/64] 2023-07-10 23:10:27,756 (trainer:732) INFO: 34epoch:train:7001-7100batch: iter_time=1.320e-04, forward_time=0.145, loss_ctc=69.502, loss_att=48.780, acc=0.709, loss=54.996, backward_time=1.026, grad_norm=108.550, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.182e-05, train_time=2.708 +[gpub007:0/64] 2023-07-10 23:12:43,609 (trainer:732) INFO: 34epoch:train:7101-7200batch: iter_time=1.284e-04, forward_time=0.145, loss_ctc=70.722, loss_att=55.840, acc=0.701, loss=60.305, backward_time=1.028, grad_norm=108.433, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.181e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 23:14:59,137 (trainer:732) INFO: 34epoch:train:7201-7300batch: iter_time=1.316e-04, forward_time=0.144, loss_ctc=79.212, loss_att=59.552, acc=0.697, loss=65.450, backward_time=1.027, grad_norm=140.278, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.180e-05, train_time=2.710 +[gpub007:0/64] 2023-07-10 23:17:14,942 (trainer:732) INFO: 34epoch:train:7301-7400batch: iter_time=1.309e-04, forward_time=0.145, loss_ctc=65.601, loss_att=48.519, acc=0.722, loss=53.643, backward_time=1.027, grad_norm=105.796, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.179e-05, train_time=2.716 +[gpub007:0/64] 2023-07-10 23:19:42,933 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-10 23:20:00,973 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 23:20:04,452 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 23:20:04,452 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-10 23:20:04,468 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 23:24:16,749 (trainer:732) INFO: 34epoch:train:7401-7500batch: iter_time=2.721, forward_time=0.149, loss_ctc=77.532, loss_att=59.688, acc=0.706, loss=65.041, backward_time=1.035, grad_norm=123.327, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.178e-05, train_time=8.436 +[gpub007:0/64] 2023-07-10 23:26:35,748 (trainer:732) INFO: 34epoch:train:7501-7600batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=68.347, loss_att=53.228, acc=0.703, loss=57.763, backward_time=1.036, grad_norm=111.743, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.178e-05, train_time=2.780 +[gpub007:0/64] 2023-07-10 23:28:51,816 (trainer:732) INFO: 34epoch:train:7601-7700batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=69.126, loss_att=49.852, acc=0.716, loss=55.635, backward_time=1.029, grad_norm=106.716, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.177e-05, train_time=2.721 +[gpub007:0/64] 2023-07-10 23:31:07,805 (trainer:732) INFO: 34epoch:train:7701-7800batch: iter_time=1.161e-04, forward_time=0.147, loss_ctc=60.326, loss_att=46.853, acc=0.717, loss=50.895, backward_time=1.028, grad_norm=122.260, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.176e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 23:33:23,743 (trainer:732) INFO: 34epoch:train:7801-7900batch: iter_time=1.140e-04, forward_time=0.147, loss_ctc=68.286, loss_att=48.469, acc=0.715, loss=54.414, backward_time=1.029, grad_norm=109.396, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.175e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 23:35:39,580 (trainer:732) INFO: 34epoch:train:7901-8000batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=72.779, loss_att=56.400, acc=0.703, loss=61.313, backward_time=1.028, grad_norm=118.306, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.174e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 23:37:55,594 (trainer:732) INFO: 34epoch:train:8001-8100batch: iter_time=1.075e-04, forward_time=0.147, loss_ctc=76.493, loss_att=57.884, acc=0.713, loss=63.467, backward_time=1.029, grad_norm=116.105, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.173e-05, train_time=2.720 +[gpub007:0/64] 2023-07-10 23:40:11,471 (trainer:732) INFO: 34epoch:train:8101-8200batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=64.637, loss_att=48.240, acc=0.724, loss=53.159, backward_time=1.029, grad_norm=104.182, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.172e-05, train_time=2.717 +[gpub007:0/64] 2023-07-10 23:42:27,255 (trainer:732) INFO: 34epoch:train:8201-8300batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=77.723, loss_att=60.042, acc=0.712, loss=65.346, backward_time=1.029, grad_norm=111.442, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.171e-05, train_time=2.715 +[gpub007:0/64] 2023-07-10 23:43:16,673 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-10 23:43:34,915 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-10 23:43:38,592 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-10 23:43:38,592 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-10 23:43:38,599 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-10 23:48:42,803 (trainer:732) INFO: 34epoch:train:8301-8400batch: iter_time=1.216, forward_time=0.146, loss_ctc=65.185, loss_att=48.408, acc=0.703, loss=53.441, backward_time=1.051, grad_norm=105.433, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.170e-05, train_time=7.511 +[gpub007:0/64] 2023-07-10 23:51:00,088 (trainer:732) INFO: 34epoch:train:8401-8500batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=69.581, loss_att=51.875, acc=0.714, loss=57.186, backward_time=1.030, grad_norm=104.887, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.169e-05, train_time=2.745 +[gpub007:0/64] 2023-07-10 23:53:16,048 (trainer:732) INFO: 34epoch:train:8501-8600batch: iter_time=9.486e-05, forward_time=0.144, loss_ctc=58.548, loss_att=46.792, acc=0.708, loss=50.319, backward_time=1.028, grad_norm=92.087, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.168e-05, train_time=2.719 +[gpub007:0/64] 2023-07-10 23:55:31,942 (trainer:732) INFO: 34epoch:train:8601-8700batch: iter_time=9.773e-05, forward_time=0.146, loss_ctc=65.359, loss_att=47.129, acc=0.709, loss=52.598, backward_time=1.028, grad_norm=112.394, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.167e-05, train_time=2.718 +[gpub007:0/64] 2023-07-10 23:57:47,828 (trainer:732) INFO: 34epoch:train:8701-8800batch: iter_time=9.795e-05, forward_time=0.145, loss_ctc=76.906, loss_att=60.671, acc=0.693, loss=65.541, backward_time=1.028, grad_norm=115.226, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.166e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 00:00:03,516 (trainer:732) INFO: 34epoch:train:8801-8900batch: iter_time=1.040e-04, forward_time=0.145, loss_ctc=74.389, loss_att=54.275, acc=0.704, loss=60.309, backward_time=1.027, grad_norm=99.943, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.165e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 00:02:19,258 (trainer:732) INFO: 34epoch:train:8901-9000batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=67.789, loss_att=51.242, acc=0.721, loss=56.206, backward_time=1.028, grad_norm=99.008, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.164e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 00:04:35,099 (trainer:732) INFO: 34epoch:train:9001-9100batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=74.780, loss_att=54.151, acc=0.714, loss=60.339, backward_time=1.028, grad_norm=105.329, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.163e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 00:06:05,782 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-11 00:06:24,337 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 00:06:27,764 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 00:06:27,764 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-11 00:06:27,770 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 00:09:52,692 (trainer:732) INFO: 34epoch:train:9101-9200batch: iter_time=1.233, forward_time=0.145, loss_ctc=67.539, loss_att=52.675, acc=0.694, loss=57.134, backward_time=1.037, grad_norm=101.310, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.162e-05, train_time=6.352 +[gpub007:0/64] 2023-07-11 00:12:09,529 (trainer:732) INFO: 34epoch:train:9201-9300batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=65.178, loss_att=48.657, acc=0.714, loss=53.613, backward_time=1.028, grad_norm=106.251, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.162e-05, train_time=2.737 +[gpub007:0/64] 2023-07-11 00:14:27,077 (trainer:732) INFO: 34epoch:train:9301-9400batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=64.710, loss_att=49.874, acc=0.699, loss=54.325, backward_time=1.028, grad_norm=107.263, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.161e-05, train_time=2.751 +[gpub007:0/64] 2023-07-11 00:16:48,717 (trainer:732) INFO: 34epoch:train:9401-9500batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=65.829, loss_att=48.090, acc=0.712, loss=53.411, backward_time=1.050, grad_norm=141.473, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.160e-05, train_time=2.833 +[gpub007:0/64] 2023-07-11 00:19:09,415 (trainer:732) INFO: 34epoch:train:9501-9600batch: iter_time=1.246e-04, forward_time=0.145, loss_ctc=70.534, loss_att=55.635, acc=0.695, loss=60.105, backward_time=1.031, grad_norm=134.685, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.159e-05, train_time=2.814 +[gpub007:0/64] 2023-07-11 00:21:25,823 (trainer:732) INFO: 34epoch:train:9601-9700batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=77.555, loss_att=56.786, acc=0.703, loss=63.017, backward_time=1.027, grad_norm=114.296, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.158e-05, train_time=2.728 +[gpub007:0/64] 2023-07-11 00:23:49,641 (trainer:732) INFO: 34epoch:train:9701-9800batch: iter_time=1.212e-04, forward_time=0.146, loss_ctc=68.079, loss_att=51.578, acc=0.715, loss=56.528, backward_time=1.040, grad_norm=99.424, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.157e-05, train_time=2.876 +[gpub007:0/64] 2023-07-11 00:26:15,750 (trainer:732) INFO: 34epoch:train:9801-9900batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=77.070, loss_att=55.145, acc=0.712, loss=61.723, backward_time=1.048, grad_norm=113.370, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.156e-05, train_time=2.922 +[gpub007:0/64] 2023-07-11 00:28:31,403 (trainer:732) INFO: 34epoch:train:9901-10000batch: iter_time=1.101e-04, forward_time=0.145, loss_ctc=64.300, loss_att=53.015, acc=0.707, loss=56.400, backward_time=1.028, grad_norm=109.344, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.155e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 00:41:09,414 (trainer:338) INFO: 34epoch results: [train] iter_time=0.226, forward_time=0.147, loss_ctc=70.190, loss_att=52.756, acc=0.709, loss=57.986, backward_time=1.032, grad_norm=111.980, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.202e-05, train_time=3.374, time=4 hours, 41 minutes and 22.59 seconds, total_count=310000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=44.610, cer_ctc=0.262, loss_att=39.048, acc=0.662, cer=0.431, wer=1.000, loss=40.717, time=6 minutes and 29.51 seconds, total_count=31878, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 56.54 seconds, total_count=0, gpu_max_cached_mem_GB=37.219 +[gpub007:0/64] 2023-07-11 00:41:24,676 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub007:0/64] 2023-07-11 00:41:24,684 (trainer:272) INFO: 35/50epoch started. Estimated time to finish: 3 days, 5 hours and 45 minutes +[gpub007:0/64] 2023-07-11 00:41:24,687 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-11 00:41:42,573 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 00:41:46,222 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 00:41:46,222 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-11 00:41:46,243 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 00:45:55,455 (trainer:732) INFO: 35epoch:train:1-100batch: iter_time=1.294, forward_time=0.145, loss_ctc=71.057, loss_att=58.298, acc=0.680, loss=62.126, backward_time=1.041, grad_norm=105.827, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.154e-05, train_time=5.415 +[gpub007:0/64] 2023-07-11 00:48:11,981 (trainer:732) INFO: 35epoch:train:101-200batch: iter_time=1.232e-04, forward_time=0.149, loss_ctc=65.096, loss_att=50.184, acc=0.701, loss=54.658, backward_time=1.031, grad_norm=101.520, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.153e-05, train_time=2.730 +[gpub007:0/64] 2023-07-11 00:50:28,253 (trainer:732) INFO: 35epoch:train:201-300batch: iter_time=1.219e-04, forward_time=0.144, loss_ctc=79.966, loss_att=67.116, acc=0.694, loss=70.971, backward_time=1.027, grad_norm=126.565, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.152e-05, train_time=2.725 +[gpub007:0/64] 2023-07-11 00:52:46,612 (trainer:732) INFO: 35epoch:train:301-400batch: iter_time=1.065e-04, forward_time=0.165, loss_ctc=81.643, loss_att=61.327, acc=0.691, loss=67.422, backward_time=1.033, grad_norm=183.099, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.184, optim0_lr0=6.151e-05, train_time=2.766 +[gpub007:0/64] 2023-07-11 00:55:03,601 (trainer:732) INFO: 35epoch:train:401-500batch: iter_time=1.256e-04, forward_time=0.145, loss_ctc=59.117, loss_att=40.342, acc=0.727, loss=45.975, backward_time=1.026, grad_norm=107.675, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.150e-05, train_time=2.741 +[gpub007:0/64] 2023-07-11 00:57:22,105 (trainer:732) INFO: 35epoch:train:501-600batch: iter_time=1.049e-04, forward_time=0.156, loss_ctc=72.909, loss_att=59.163, acc=0.693, loss=63.287, backward_time=1.030, grad_norm=104.951, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.149e-05, train_time=2.770 +[gpub007:0/64] 2023-07-11 01:00:03,445 (trainer:732) INFO: 35epoch:train:601-700batch: iter_time=6.204e-04, forward_time=0.224, loss_ctc=68.252, loss_att=50.145, acc=0.704, loss=55.577, backward_time=1.068, grad_norm=120.151, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.185, optim0_lr0=6.148e-05, train_time=3.227 +[gpub007:0/64] 2023-07-11 01:02:19,556 (trainer:732) INFO: 35epoch:train:701-800batch: iter_time=1.048e-04, forward_time=0.145, loss_ctc=69.526, loss_att=49.297, acc=0.715, loss=55.366, backward_time=1.029, grad_norm=101.080, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.148e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 01:03:11,525 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-11 01:03:29,024 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 01:03:32,635 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 01:03:32,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-11 01:03:32,642 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 01:07:39,723 (trainer:732) INFO: 35epoch:train:801-900batch: iter_time=1.439, forward_time=0.144, loss_ctc=73.510, loss_att=56.228, acc=0.691, loss=61.413, backward_time=1.043, grad_norm=116.311, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.147e-05, train_time=6.403 +[gpub007:0/64] 2023-07-11 01:09:56,084 (trainer:732) INFO: 35epoch:train:901-1000batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=67.649, loss_att=52.830, acc=0.706, loss=57.276, backward_time=1.028, grad_norm=96.165, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.146e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 01:12:12,182 (trainer:732) INFO: 35epoch:train:1001-1100batch: iter_time=1.215e-04, forward_time=0.144, loss_ctc=76.518, loss_att=62.802, acc=0.713, loss=66.917, backward_time=1.029, grad_norm=129.549, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.145e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 01:14:28,310 (trainer:732) INFO: 35epoch:train:1101-1200batch: iter_time=1.289e-04, forward_time=0.145, loss_ctc=80.484, loss_att=59.491, acc=0.703, loss=65.789, backward_time=1.029, grad_norm=135.495, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.144e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 01:16:44,028 (trainer:732) INFO: 35epoch:train:1201-1300batch: iter_time=1.366e-04, forward_time=0.146, loss_ctc=59.011, loss_att=42.280, acc=0.722, loss=47.299, backward_time=1.029, grad_norm=104.603, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.143e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 01:18:59,727 (trainer:732) INFO: 35epoch:train:1301-1400batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=70.182, loss_att=57.012, acc=0.705, loss=60.963, backward_time=1.027, grad_norm=123.304, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.142e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 01:21:15,380 (trainer:732) INFO: 35epoch:train:1401-1500batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=68.956, loss_att=51.980, acc=0.710, loss=57.073, backward_time=1.028, grad_norm=104.932, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.141e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 01:23:30,905 (trainer:732) INFO: 35epoch:train:1501-1600batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=68.553, loss_att=49.058, acc=0.721, loss=54.907, backward_time=1.027, grad_norm=121.556, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.140e-05, train_time=2.710 +[gpub007:0/64] 2023-07-11 01:25:09,859 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-11 01:25:28,310 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 01:25:32,079 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 01:25:32,079 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-11 01:25:32,085 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 01:29:32,231 (trainer:732) INFO: 35epoch:train:1601-1700batch: iter_time=2.182, forward_time=0.155, loss_ctc=71.780, loss_att=57.867, acc=0.690, loss=62.041, backward_time=1.042, grad_norm=134.284, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.139e-05, train_time=7.226 +[gpub007:0/64] 2023-07-11 01:31:48,376 (trainer:732) INFO: 35epoch:train:1701-1800batch: iter_time=1.265e-04, forward_time=0.143, loss_ctc=72.173, loss_att=57.303, acc=0.692, loss=61.764, backward_time=1.029, grad_norm=136.674, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.138e-05, train_time=2.723 +[gpub007:0/64] 2023-07-11 01:34:04,193 (trainer:732) INFO: 35epoch:train:1801-1900batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=66.889, loss_att=57.530, acc=0.701, loss=60.338, backward_time=1.028, grad_norm=107.858, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.137e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 01:36:20,105 (trainer:732) INFO: 35epoch:train:1901-2000batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=79.736, loss_att=62.074, acc=0.696, loss=67.372, backward_time=1.029, grad_norm=133.530, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.136e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 01:38:35,772 (trainer:732) INFO: 35epoch:train:2001-2100batch: iter_time=1.155e-04, forward_time=0.145, loss_ctc=69.250, loss_att=48.513, acc=0.719, loss=54.734, backward_time=1.027, grad_norm=105.912, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.136e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 01:40:51,437 (trainer:732) INFO: 35epoch:train:2101-2200batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=61.290, loss_att=43.960, acc=0.730, loss=49.159, backward_time=1.028, grad_norm=91.948, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.135e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 01:43:07,740 (trainer:732) INFO: 35epoch:train:2201-2300batch: iter_time=1.224e-04, forward_time=0.147, loss_ctc=73.331, loss_att=60.271, acc=0.683, loss=64.189, backward_time=1.032, grad_norm=122.389, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.134e-05, train_time=2.726 +[gpub007:0/64] 2023-07-11 01:45:23,324 (trainer:732) INFO: 35epoch:train:2301-2400batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=67.864, loss_att=46.445, acc=0.726, loss=52.871, backward_time=1.027, grad_norm=99.322, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.133e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 01:47:39,556 (trainer:732) INFO: 35epoch:train:2401-2500batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=69.151, loss_att=50.577, acc=0.703, loss=56.149, backward_time=1.029, grad_norm=98.676, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.132e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 01:47:42,508 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-11 01:48:00,725 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 01:48:04,464 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 01:48:04,464 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-11 01:48:04,470 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 01:53:47,387 (trainer:732) INFO: 35epoch:train:2501-2600batch: iter_time=1.328, forward_time=0.146, loss_ctc=71.851, loss_att=57.620, acc=0.692, loss=61.889, backward_time=1.038, grad_norm=135.777, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.131e-05, train_time=7.356 +[gpub007:0/64] 2023-07-11 01:56:03,249 (trainer:732) INFO: 35epoch:train:2601-2700batch: iter_time=1.240e-04, forward_time=0.146, loss_ctc=67.460, loss_att=57.145, acc=0.701, loss=60.240, backward_time=1.028, grad_norm=109.035, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.130e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 01:58:18,808 (trainer:732) INFO: 35epoch:train:2701-2800batch: iter_time=1.425e-04, forward_time=0.145, loss_ctc=79.815, loss_att=61.445, acc=0.698, loss=66.956, backward_time=1.027, grad_norm=135.531, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.129e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 02:00:34,368 (trainer:732) INFO: 35epoch:train:2801-2900batch: iter_time=1.466e-04, forward_time=0.146, loss_ctc=69.560, loss_att=49.257, acc=0.717, loss=55.348, backward_time=1.027, grad_norm=106.765, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.128e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 02:02:49,793 (trainer:732) INFO: 35epoch:train:2901-3000batch: iter_time=1.322e-04, forward_time=0.145, loss_ctc=63.448, loss_att=45.096, acc=0.725, loss=50.602, backward_time=1.026, grad_norm=99.059, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.127e-05, train_time=2.708 +[gpub007:0/64] 2023-07-11 02:05:05,743 (trainer:732) INFO: 35epoch:train:3001-3100batch: iter_time=1.412e-04, forward_time=0.146, loss_ctc=71.396, loss_att=59.928, acc=0.682, loss=63.368, backward_time=1.030, grad_norm=107.658, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.126e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 02:07:21,375 (trainer:732) INFO: 35epoch:train:3101-3200batch: iter_time=1.381e-04, forward_time=0.146, loss_ctc=66.404, loss_att=45.922, acc=0.727, loss=52.067, backward_time=1.026, grad_norm=129.685, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.125e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 02:09:37,025 (trainer:732) INFO: 35epoch:train:3201-3300batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=67.807, loss_att=48.720, acc=0.712, loss=54.446, backward_time=1.027, grad_norm=88.566, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.124e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 02:10:24,930 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-11 02:10:42,827 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 02:10:46,529 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 02:10:46,529 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-11 02:10:46,536 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 02:15:46,907 (trainer:732) INFO: 35epoch:train:3301-3400batch: iter_time=1.267, forward_time=0.145, loss_ctc=74.487, loss_att=61.332, acc=0.687, loss=65.278, backward_time=1.054, grad_norm=147.561, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.124e-05, train_time=7.397 +[gpub007:0/64] 2023-07-11 02:18:02,805 (trainer:732) INFO: 35epoch:train:3401-3500batch: iter_time=1.326e-04, forward_time=0.146, loss_ctc=64.728, loss_att=49.894, acc=0.720, loss=54.344, backward_time=1.028, grad_norm=102.780, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.123e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 02:20:19,359 (trainer:732) INFO: 35epoch:train:3501-3600batch: iter_time=1.329e-04, forward_time=0.148, loss_ctc=71.956, loss_att=62.087, acc=0.708, loss=65.048, backward_time=1.032, grad_norm=99.123, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.122e-05, train_time=2.731 +[gpub007:0/64] 2023-07-11 02:22:35,236 (trainer:732) INFO: 35epoch:train:3601-3700batch: iter_time=1.430e-04, forward_time=0.147, loss_ctc=73.577, loss_att=55.248, acc=0.715, loss=60.747, backward_time=1.028, grad_norm=116.857, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.121e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 02:24:50,900 (trainer:732) INFO: 35epoch:train:3701-3800batch: iter_time=1.092e-04, forward_time=0.145, loss_ctc=68.778, loss_att=48.953, acc=0.719, loss=54.901, backward_time=1.028, grad_norm=103.728, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.120e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 02:27:06,580 (trainer:732) INFO: 35epoch:train:3801-3900batch: iter_time=9.491e-05, forward_time=0.144, loss_ctc=67.110, loss_att=48.630, acc=0.717, loss=54.174, backward_time=1.028, grad_norm=108.396, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.119e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 02:29:22,404 (trainer:732) INFO: 35epoch:train:3901-4000batch: iter_time=1.010e-04, forward_time=0.144, loss_ctc=70.506, loss_att=58.358, acc=0.704, loss=62.003, backward_time=1.029, grad_norm=100.430, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.118e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 02:31:38,118 (trainer:732) INFO: 35epoch:train:4001-4100batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=66.568, loss_att=47.477, acc=0.724, loss=53.204, backward_time=1.027, grad_norm=101.728, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.117e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 02:33:10,343 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-11 02:33:28,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 02:33:32,026 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 02:33:32,026 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-11 02:33:32,032 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 02:36:40,228 (trainer:732) INFO: 35epoch:train:4101-4200batch: iter_time=1.286, forward_time=0.146, loss_ctc=71.481, loss_att=56.412, acc=0.697, loss=60.933, backward_time=1.044, grad_norm=119.430, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.116e-05, train_time=6.042 +[gpub007:0/64] 2023-07-11 02:38:56,733 (trainer:732) INFO: 35epoch:train:4201-4300batch: iter_time=1.221e-04, forward_time=0.147, loss_ctc=64.522, loss_att=49.545, acc=0.718, loss=54.038, backward_time=1.028, grad_norm=104.859, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.115e-05, train_time=2.730 +[gpub007:0/64] 2023-07-11 02:41:12,944 (trainer:732) INFO: 35epoch:train:4301-4400batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=71.735, loss_att=62.122, acc=0.706, loss=65.006, backward_time=1.029, grad_norm=129.940, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.114e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 02:43:29,074 (trainer:732) INFO: 35epoch:train:4401-4500batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=73.693, loss_att=57.858, acc=0.713, loss=62.609, backward_time=1.030, grad_norm=135.338, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.113e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 02:45:45,736 (trainer:732) INFO: 35epoch:train:4501-4600batch: iter_time=1.224e-04, forward_time=0.146, loss_ctc=68.255, loss_att=46.938, acc=0.721, loss=53.333, backward_time=1.031, grad_norm=102.361, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.183, optim0_lr0=6.113e-05, train_time=2.733 +[gpub007:0/64] 2023-07-11 02:48:01,424 (trainer:732) INFO: 35epoch:train:4601-4700batch: iter_time=1.406e-04, forward_time=0.147, loss_ctc=66.285, loss_att=48.356, acc=0.719, loss=53.735, backward_time=1.027, grad_norm=123.813, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.183, optim0_lr0=6.112e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 02:50:17,383 (trainer:732) INFO: 35epoch:train:4701-4800batch: iter_time=1.240e-04, forward_time=0.147, loss_ctc=70.652, loss_att=58.125, acc=0.705, loss=61.883, backward_time=1.029, grad_norm=112.224, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.183, optim0_lr0=6.111e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 02:52:33,237 (trainer:732) INFO: 35epoch:train:4801-4900batch: iter_time=1.285e-04, forward_time=0.147, loss_ctc=66.220, loss_att=47.069, acc=0.723, loss=52.814, backward_time=1.030, grad_norm=96.331, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.110e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 02:54:49,141 (trainer:732) INFO: 35epoch:train:4901-5000batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=74.509, loss_att=56.995, acc=0.707, loss=62.249, backward_time=1.029, grad_norm=103.038, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.109e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 02:54:52,148 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-11 02:55:10,392 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 02:55:13,906 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 02:55:13,906 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-11 02:55:13,913 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 03:00:01,615 (trainer:732) INFO: 35epoch:train:5001-5100batch: iter_time=1.327, forward_time=0.146, loss_ctc=71.986, loss_att=58.396, acc=0.695, loss=62.473, backward_time=1.042, grad_norm=124.376, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.108e-05, train_time=6.249 +[gpub007:0/64] 2023-07-11 03:02:17,312 (trainer:732) INFO: 35epoch:train:5101-5200batch: iter_time=1.390e-04, forward_time=0.146, loss_ctc=67.031, loss_att=57.719, acc=0.701, loss=60.513, backward_time=1.027, grad_norm=100.455, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.107e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 03:04:32,968 (trainer:732) INFO: 35epoch:train:5201-5300batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=79.778, loss_att=61.892, acc=0.698, loss=67.258, backward_time=1.028, grad_norm=114.980, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.106e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 03:06:48,744 (trainer:732) INFO: 35epoch:train:5301-5400batch: iter_time=1.211e-04, forward_time=0.148, loss_ctc=66.989, loss_att=47.658, acc=0.724, loss=53.458, backward_time=1.028, grad_norm=104.013, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.105e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 03:09:04,095 (trainer:732) INFO: 35epoch:train:5401-5500batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=60.910, loss_att=44.817, acc=0.727, loss=49.645, backward_time=1.026, grad_norm=127.532, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.104e-05, train_time=2.707 +[gpub007:0/64] 2023-07-11 03:11:21,842 (trainer:732) INFO: 35epoch:train:5501-5600batch: iter_time=1.336e-04, forward_time=0.146, loss_ctc=71.648, loss_att=59.847, acc=0.684, loss=63.387, backward_time=1.027, grad_norm=118.805, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.103e-05, train_time=2.755 +[gpub007:0/64] 2023-07-11 03:13:37,919 (trainer:732) INFO: 35epoch:train:5601-5700batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=66.025, loss_att=45.866, acc=0.725, loss=51.914, backward_time=1.027, grad_norm=97.475, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.103e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 03:15:53,756 (trainer:732) INFO: 35epoch:train:5701-5800batch: iter_time=1.177e-04, forward_time=0.144, loss_ctc=69.656, loss_att=49.274, acc=0.710, loss=55.389, backward_time=1.028, grad_norm=99.336, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.102e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 03:16:42,544 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-11 03:17:00,448 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 03:17:03,895 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 03:17:03,895 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-11 03:17:03,901 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 03:23:45,879 (trainer:732) INFO: 35epoch:train:5801-5900batch: iter_time=1.271, forward_time=0.166, loss_ctc=74.619, loss_att=61.761, acc=0.688, loss=65.619, backward_time=1.044, grad_norm=127.900, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.183, optim0_lr0=6.101e-05, train_time=9.442 +[gpub007:0/64] 2023-07-11 03:26:02,073 (trainer:732) INFO: 35epoch:train:5901-6000batch: iter_time=1.109e-04, forward_time=0.146, loss_ctc=65.728, loss_att=52.202, acc=0.712, loss=56.260, backward_time=1.028, grad_norm=107.061, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.100e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 03:28:18,432 (trainer:732) INFO: 35epoch:train:6001-6100batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=72.553, loss_att=63.561, acc=0.703, loss=66.258, backward_time=1.030, grad_norm=112.176, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.099e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 03:30:36,801 (trainer:732) INFO: 35epoch:train:6101-6200batch: iter_time=1.182e-04, forward_time=0.145, loss_ctc=72.426, loss_att=55.205, acc=0.715, loss=60.371, backward_time=1.029, grad_norm=114.239, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.098e-05, train_time=2.767 +[gpub007:0/64] 2023-07-11 03:32:52,419 (trainer:732) INFO: 35epoch:train:6201-6300batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=67.725, loss_att=48.404, acc=0.722, loss=54.200, backward_time=1.027, grad_norm=115.081, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.097e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 03:35:08,194 (trainer:732) INFO: 35epoch:train:6301-6400batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=65.164, loss_att=46.345, acc=0.723, loss=51.991, backward_time=1.027, grad_norm=111.594, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.181, optim0_lr0=6.096e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 03:37:24,211 (trainer:732) INFO: 35epoch:train:6401-6500batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=71.981, loss_att=59.921, acc=0.706, loss=63.539, backward_time=1.029, grad_norm=137.257, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.181, optim0_lr0=6.095e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 03:39:48,608 (trainer:732) INFO: 35epoch:train:6501-6600batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=65.220, loss_att=44.876, acc=0.731, loss=50.979, backward_time=1.037, grad_norm=106.366, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.181, optim0_lr0=6.094e-05, train_time=2.888 +[gpub007:0/64] 2023-07-11 03:41:34,779 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-11 03:41:53,179 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 03:41:56,570 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 03:41:56,570 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-11 03:41:56,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 03:47:08,821 (trainer:732) INFO: 35epoch:train:6601-6700batch: iter_time=1.269, forward_time=0.145, loss_ctc=65.010, loss_att=50.773, acc=0.703, loss=55.044, backward_time=1.061, grad_norm=98.507, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.093e-05, train_time=8.804 +[gpub007:0/64] 2023-07-11 03:49:25,568 (trainer:732) INFO: 35epoch:train:6701-6800batch: iter_time=1.340e-04, forward_time=0.146, loss_ctc=69.659, loss_att=57.098, acc=0.700, loss=60.867, backward_time=1.033, grad_norm=107.049, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.093e-05, train_time=2.735 +[gpub007:0/64] 2023-07-11 03:51:41,501 (trainer:732) INFO: 35epoch:train:6801-6900batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=66.918, loss_att=56.542, acc=0.704, loss=59.655, backward_time=1.026, grad_norm=111.766, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.092e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 03:53:57,200 (trainer:732) INFO: 35epoch:train:6901-7000batch: iter_time=1.077e-04, forward_time=0.144, loss_ctc=76.672, loss_att=58.767, acc=0.703, loss=64.139, backward_time=1.025, grad_norm=135.940, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.091e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 03:56:12,939 (trainer:732) INFO: 35epoch:train:7001-7100batch: iter_time=1.367e-04, forward_time=0.145, loss_ctc=67.771, loss_att=49.998, acc=0.721, loss=55.330, backward_time=1.027, grad_norm=116.335, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.090e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 03:58:28,536 (trainer:732) INFO: 35epoch:train:7101-7200batch: iter_time=1.583e-04, forward_time=0.146, loss_ctc=60.932, loss_att=43.590, acc=0.730, loss=48.792, backward_time=1.027, grad_norm=117.125, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.089e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 04:00:44,414 (trainer:732) INFO: 35epoch:train:7201-7300batch: iter_time=1.382e-04, forward_time=0.147, loss_ctc=70.503, loss_att=59.078, acc=0.691, loss=62.506, backward_time=1.028, grad_norm=134.477, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.088e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 04:03:00,131 (trainer:732) INFO: 35epoch:train:7301-7400batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=65.698, loss_att=45.779, acc=0.725, loss=51.755, backward_time=1.027, grad_norm=139.588, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.087e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 04:05:15,774 (trainer:732) INFO: 35epoch:train:7401-7500batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=68.809, loss_att=49.404, acc=0.712, loss=55.225, backward_time=1.026, grad_norm=126.521, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.086e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 04:05:18,451 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-11 04:05:36,460 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 04:05:39,891 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 04:05:39,892 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-11 04:05:39,898 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 04:12:39,450 (trainer:732) INFO: 35epoch:train:7501-7600batch: iter_time=1.257, forward_time=0.146, loss_ctc=66.129, loss_att=55.947, acc=0.694, loss=59.002, backward_time=1.042, grad_norm=109.742, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.085e-05, train_time=8.873 +[gpub007:0/64] 2023-07-11 04:14:55,636 (trainer:732) INFO: 35epoch:train:7601-7700batch: iter_time=1.305e-04, forward_time=0.145, loss_ctc=64.633, loss_att=48.882, acc=0.709, loss=53.607, backward_time=1.027, grad_norm=95.094, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.084e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 04:17:19,919 (trainer:732) INFO: 35epoch:train:7701-7800batch: iter_time=1.123e-04, forward_time=0.146, loss_ctc=76.885, loss_att=63.661, acc=0.710, loss=67.628, backward_time=1.049, grad_norm=104.271, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.084e-05, train_time=2.885 +[gpub007:0/64] 2023-07-11 04:19:36,871 (trainer:732) INFO: 35epoch:train:7801-7900batch: iter_time=1.249e-04, forward_time=0.152, loss_ctc=75.513, loss_att=56.761, acc=0.706, loss=62.387, backward_time=1.031, grad_norm=135.305, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.083e-05, train_time=2.739 +[gpub007:0/64] 2023-07-11 04:22:01,593 (trainer:732) INFO: 35epoch:train:7901-8000batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=59.219, loss_att=40.744, acc=0.730, loss=46.287, backward_time=1.033, grad_norm=94.732, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.082e-05, train_time=2.894 +[gpub007:0/64] 2023-07-11 04:24:17,955 (trainer:732) INFO: 35epoch:train:8001-8100batch: iter_time=1.331e-04, forward_time=0.147, loss_ctc=70.557, loss_att=57.207, acc=0.705, loss=61.212, backward_time=1.030, grad_norm=106.149, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.081e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 04:26:33,643 (trainer:732) INFO: 35epoch:train:8101-8200batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=67.643, loss_att=50.416, acc=0.707, loss=55.584, backward_time=1.025, grad_norm=93.448, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.080e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 04:28:50,730 (trainer:732) INFO: 35epoch:train:8201-8300batch: iter_time=1.128e-04, forward_time=0.156, loss_ctc=67.530, loss_att=48.098, acc=0.723, loss=53.928, backward_time=1.026, grad_norm=100.965, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.079e-05, train_time=2.742 +[gpub007:0/64] 2023-07-11 04:29:45,219 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-11 04:30:02,950 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 04:30:06,302 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 04:30:06,302 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-11 04:30:06,309 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 04:34:27,849 (trainer:732) INFO: 35epoch:train:8301-8400batch: iter_time=1.424, forward_time=0.145, loss_ctc=68.160, loss_att=54.907, acc=0.694, loss=58.883, backward_time=1.057, grad_norm=110.656, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.078e-05, train_time=6.742 +[gpub007:0/64] 2023-07-11 04:36:44,477 (trainer:732) INFO: 35epoch:train:8401-8500batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=64.626, loss_att=49.754, acc=0.719, loss=54.216, backward_time=1.027, grad_norm=101.741, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.077e-05, train_time=2.732 +[gpub007:0/64] 2023-07-11 04:39:00,962 (trainer:732) INFO: 35epoch:train:8501-8600batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=72.339, loss_att=62.021, acc=0.709, loss=65.116, backward_time=1.029, grad_norm=105.363, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.076e-05, train_time=2.729 +[gpub007:0/64] 2023-07-11 04:41:19,932 (trainer:732) INFO: 35epoch:train:8601-8700batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=73.102, loss_att=55.859, acc=0.717, loss=61.032, backward_time=1.042, grad_norm=112.970, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.075e-05, train_time=2.779 +[gpub007:0/64] 2023-07-11 04:43:35,769 (trainer:732) INFO: 35epoch:train:8701-8800batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=68.061, loss_att=48.807, acc=0.721, loss=54.583, backward_time=1.026, grad_norm=100.965, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.075e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 04:45:52,213 (trainer:732) INFO: 35epoch:train:8801-8900batch: iter_time=1.283e-04, forward_time=0.146, loss_ctc=65.514, loss_att=48.609, acc=0.722, loss=53.680, backward_time=1.028, grad_norm=121.253, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.074e-05, train_time=2.729 +[gpub007:0/64] 2023-07-11 04:48:09,179 (trainer:732) INFO: 35epoch:train:8901-9000batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=69.248, loss_att=57.874, acc=0.712, loss=61.286, backward_time=1.031, grad_norm=124.270, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.073e-05, train_time=2.739 +[gpub007:0/64] 2023-07-11 04:50:25,074 (trainer:732) INFO: 35epoch:train:9001-9100batch: iter_time=1.222e-04, forward_time=0.146, loss_ctc=65.436, loss_att=45.658, acc=0.729, loss=51.591, backward_time=1.028, grad_norm=102.594, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.072e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 04:51:57,364 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-11 04:52:15,147 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 04:52:18,798 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 04:52:18,798 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-11 04:52:18,804 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 04:56:08,511 (trainer:732) INFO: 35epoch:train:9101-9200batch: iter_time=1.343, forward_time=0.145, loss_ctc=65.095, loss_att=50.281, acc=0.712, loss=54.725, backward_time=1.037, grad_norm=103.197, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.071e-05, train_time=6.869 +[gpub007:0/64] 2023-07-11 04:58:25,926 (trainer:732) INFO: 35epoch:train:9201-9300batch: iter_time=9.774e-05, forward_time=0.146, loss_ctc=68.834, loss_att=56.585, acc=0.710, loss=60.259, backward_time=1.033, grad_norm=120.324, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.070e-05, train_time=2.748 +[gpub007:0/64] 2023-07-11 05:00:42,775 (trainer:732) INFO: 35epoch:train:9301-9400batch: iter_time=9.473e-05, forward_time=0.146, loss_ctc=66.220, loss_att=54.518, acc=0.715, loss=58.028, backward_time=1.030, grad_norm=109.350, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.069e-05, train_time=2.737 +[gpub007:0/64] 2023-07-11 05:02:59,833 (trainer:732) INFO: 35epoch:train:9401-9500batch: iter_time=1.256e-04, forward_time=0.146, loss_ctc=77.266, loss_att=59.688, acc=0.715, loss=64.961, backward_time=1.029, grad_norm=126.954, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.068e-05, train_time=2.741 +[gpub007:0/64] 2023-07-11 05:05:15,709 (trainer:732) INFO: 35epoch:train:9501-9600batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=68.145, loss_att=47.616, acc=0.729, loss=53.775, backward_time=1.026, grad_norm=101.105, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.181, optim0_lr0=6.067e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 05:07:31,651 (trainer:732) INFO: 35epoch:train:9601-9700batch: iter_time=1.342e-04, forward_time=0.147, loss_ctc=60.957, loss_att=45.078, acc=0.729, loss=49.842, backward_time=1.028, grad_norm=104.035, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.066e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 05:09:47,772 (trainer:732) INFO: 35epoch:train:9701-9800batch: iter_time=1.304e-04, forward_time=0.147, loss_ctc=71.986, loss_att=59.815, acc=0.696, loss=63.466, backward_time=1.029, grad_norm=104.690, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.066e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 05:12:03,376 (trainer:732) INFO: 35epoch:train:9801-9900batch: iter_time=1.380e-04, forward_time=0.146, loss_ctc=65.894, loss_att=45.906, acc=0.732, loss=51.903, backward_time=1.027, grad_norm=97.625, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.065e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 05:14:18,858 (trainer:732) INFO: 35epoch:train:9901-10000batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=68.837, loss_att=49.337, acc=0.716, loss=55.187, backward_time=1.025, grad_norm=106.684, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.064e-05, train_time=2.709 +[gpub007:0/64] 2023-07-11 05:27:42,494 (trainer:338) INFO: 35epoch results: [train] iter_time=0.167, forward_time=0.147, loss_ctc=69.359, loss_att=53.354, acc=0.710, loss=58.156, backward_time=1.031, grad_norm=113.328, clip=100.000, loss_scale=8.240e+29, optim_step_time=0.182, optim0_lr0=6.109e-05, train_time=3.275, time=4 hours, 33 minutes and 9.74 seconds, total_count=320000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=45.896, cer_ctc=0.263, loss_att=40.346, acc=0.659, cer=0.430, wer=1.000, loss=42.011, time=7 minutes and 12.43 seconds, total_count=32890, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 55.64 seconds, total_count=0, gpu_max_cached_mem_GB=37.219 +[gpub007:0/64] 2023-07-11 05:27:58,165 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub007:0/64] 2023-07-11 05:27:58,173 (trainer:272) INFO: 36/50epoch started. Estimated time to finish: 3 days, 38 minutes and 32.8 seconds +[gpub007:0/64] 2023-07-11 05:27:58,177 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-11 05:28:15,921 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 05:28:19,329 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 05:28:19,329 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-11 05:28:19,335 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 05:32:32,282 (trainer:732) INFO: 36epoch:train:1-100batch: iter_time=1.316, forward_time=0.184, loss_ctc=69.734, loss_att=51.362, acc=0.707, loss=56.874, backward_time=1.037, grad_norm=139.808, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.063e-05, train_time=5.482 +[gpub007:0/64] 2023-07-11 05:34:48,427 (trainer:732) INFO: 36epoch:train:101-200batch: iter_time=1.109e-04, forward_time=0.144, loss_ctc=80.713, loss_att=61.964, acc=0.695, loss=67.589, backward_time=1.029, grad_norm=120.849, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.062e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 05:37:06,954 (trainer:732) INFO: 36epoch:train:201-300batch: iter_time=1.103e-04, forward_time=0.145, loss_ctc=78.128, loss_att=57.601, acc=0.703, loss=63.759, backward_time=1.029, grad_norm=114.190, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.061e-05, train_time=2.772 +[gpub007:0/64] 2023-07-11 05:39:23,721 (trainer:732) INFO: 36epoch:train:301-400batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=77.415, loss_att=56.917, acc=0.706, loss=63.066, backward_time=1.029, grad_norm=111.814, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.060e-05, train_time=2.735 +[gpub007:0/64] 2023-07-11 05:41:39,536 (trainer:732) INFO: 36epoch:train:401-500batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=73.724, loss_att=55.345, acc=0.711, loss=60.858, backward_time=1.028, grad_norm=107.701, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.059e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 05:43:58,196 (trainer:732) INFO: 36epoch:train:501-600batch: iter_time=1.238e-04, forward_time=0.150, loss_ctc=78.186, loss_att=58.449, acc=0.692, loss=64.370, backward_time=1.031, grad_norm=130.512, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.058e-05, train_time=2.773 +[gpub007:0/64] 2023-07-11 05:46:21,232 (trainer:732) INFO: 36epoch:train:601-700batch: iter_time=6.214e-04, forward_time=0.163, loss_ctc=75.254, loss_att=57.096, acc=0.719, loss=62.543, backward_time=1.037, grad_norm=146.823, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.184, optim0_lr0=6.058e-05, train_time=2.861 +[gpub007:0/64] 2023-07-11 05:48:40,509 (trainer:732) INFO: 36epoch:train:701-800batch: iter_time=1.389e-04, forward_time=0.147, loss_ctc=80.020, loss_att=50.423, acc=0.711, loss=59.303, backward_time=1.033, grad_norm=145.153, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.057e-05, train_time=2.785 +[gpub007:0/64] 2023-07-11 05:49:34,320 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-11 05:49:52,099 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 05:49:55,493 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 05:49:55,493 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-11 05:49:55,501 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 05:54:40,956 (trainer:732) INFO: 36epoch:train:801-900batch: iter_time=1.630, forward_time=0.145, loss_ctc=72.961, loss_att=50.532, acc=0.726, loss=57.261, backward_time=1.036, grad_norm=104.979, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.056e-05, train_time=7.209 +[gpub007:0/64] 2023-07-11 05:56:58,421 (trainer:732) INFO: 36epoch:train:901-1000batch: iter_time=1.194e-04, forward_time=0.148, loss_ctc=65.491, loss_att=53.826, acc=0.697, loss=57.326, backward_time=1.031, grad_norm=98.829, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.055e-05, train_time=2.749 +[gpub007:0/64] 2023-07-11 05:59:14,432 (trainer:732) INFO: 36epoch:train:1001-1100batch: iter_time=1.306e-04, forward_time=0.146, loss_ctc=85.174, loss_att=62.650, acc=0.698, loss=69.407, backward_time=1.028, grad_norm=124.007, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.054e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 06:01:30,579 (trainer:732) INFO: 36epoch:train:1101-1200batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=73.713, loss_att=53.317, acc=0.711, loss=59.436, backward_time=1.030, grad_norm=124.941, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.053e-05, train_time=2.723 +[gpub007:0/64] 2023-07-11 06:03:46,675 (trainer:732) INFO: 36epoch:train:1201-1300batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=80.947, loss_att=59.264, acc=0.705, loss=65.769, backward_time=1.030, grad_norm=114.469, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.052e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 06:06:02,340 (trainer:732) INFO: 36epoch:train:1301-1400batch: iter_time=1.293e-04, forward_time=0.145, loss_ctc=71.894, loss_att=53.652, acc=0.707, loss=59.124, backward_time=1.027, grad_norm=118.071, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.051e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 06:08:18,168 (trainer:732) INFO: 36epoch:train:1401-1500batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=75.819, loss_att=58.327, acc=0.708, loss=63.575, backward_time=1.029, grad_norm=113.417, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.050e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 06:10:33,875 (trainer:732) INFO: 36epoch:train:1501-1600batch: iter_time=1.321e-04, forward_time=0.145, loss_ctc=80.999, loss_att=55.306, acc=0.707, loss=63.014, backward_time=1.028, grad_norm=141.020, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.050e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 06:12:07,718 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-11 06:12:25,843 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 06:12:29,298 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 06:12:29,298 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-11 06:12:29,304 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 06:15:38,958 (trainer:732) INFO: 36epoch:train:1601-1700batch: iter_time=1.267, forward_time=0.145, loss_ctc=73.839, loss_att=51.751, acc=0.721, loss=58.377, backward_time=1.041, grad_norm=136.783, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.049e-05, train_time=6.101 +[gpub007:0/64] 2023-07-11 06:17:55,585 (trainer:732) INFO: 36epoch:train:1701-1800batch: iter_time=1.134e-04, forward_time=0.147, loss_ctc=71.407, loss_att=52.192, acc=0.710, loss=57.956, backward_time=1.032, grad_norm=120.304, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.048e-05, train_time=2.732 +[gpub007:0/64] 2023-07-11 06:20:12,511 (trainer:732) INFO: 36epoch:train:1801-1900batch: iter_time=1.109e-04, forward_time=0.147, loss_ctc=76.794, loss_att=59.603, acc=0.703, loss=64.761, backward_time=1.033, grad_norm=113.446, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.047e-05, train_time=2.738 +[gpub007:0/64] 2023-07-11 06:22:28,497 (trainer:732) INFO: 36epoch:train:1901-2000batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=78.621, loss_att=58.117, acc=0.704, loss=64.269, backward_time=1.030, grad_norm=131.981, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.046e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 06:24:44,371 (trainer:732) INFO: 36epoch:train:2001-2100batch: iter_time=1.327e-04, forward_time=0.146, loss_ctc=74.860, loss_att=55.204, acc=0.705, loss=61.101, backward_time=1.026, grad_norm=125.730, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.045e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 06:27:00,293 (trainer:732) INFO: 36epoch:train:2101-2200batch: iter_time=1.321e-04, forward_time=0.147, loss_ctc=76.349, loss_att=54.688, acc=0.716, loss=61.186, backward_time=1.028, grad_norm=110.411, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.044e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 06:29:15,913 (trainer:732) INFO: 36epoch:train:2201-2300batch: iter_time=1.225e-04, forward_time=0.145, loss_ctc=75.174, loss_att=57.150, acc=0.697, loss=62.558, backward_time=1.027, grad_norm=114.348, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.043e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 06:31:31,591 (trainer:732) INFO: 36epoch:train:2301-2400batch: iter_time=1.299e-04, forward_time=0.145, loss_ctc=73.120, loss_att=56.115, acc=0.721, loss=61.216, backward_time=1.027, grad_norm=117.121, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.043e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 06:33:46,846 (trainer:732) INFO: 36epoch:train:2401-2500batch: iter_time=1.153e-04, forward_time=0.145, loss_ctc=75.214, loss_att=48.631, acc=0.711, loss=56.606, backward_time=1.024, grad_norm=108.186, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.042e-05, train_time=2.705 +[gpub007:0/64] 2023-07-11 06:33:49,488 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-11 06:34:07,811 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 06:34:11,276 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 06:34:11,276 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-11 06:34:11,283 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 06:38:59,026 (trainer:732) INFO: 36epoch:train:2501-2600batch: iter_time=1.329, forward_time=0.189, loss_ctc=76.688, loss_att=57.174, acc=0.711, loss=63.028, backward_time=1.043, grad_norm=146.876, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.184, optim0_lr0=6.041e-05, train_time=6.243 +[gpub007:0/64] 2023-07-11 06:41:14,869 (trainer:732) INFO: 36epoch:train:2601-2700batch: iter_time=1.375e-04, forward_time=0.146, loss_ctc=66.348, loss_att=52.670, acc=0.698, loss=56.774, backward_time=1.028, grad_norm=103.966, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.040e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 06:43:30,832 (trainer:732) INFO: 36epoch:train:2701-2800batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=80.891, loss_att=59.854, acc=0.696, loss=66.166, backward_time=1.030, grad_norm=137.370, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.039e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 06:45:46,669 (trainer:732) INFO: 36epoch:train:2801-2900batch: iter_time=1.504e-04, forward_time=0.147, loss_ctc=75.199, loss_att=54.915, acc=0.700, loss=61.000, backward_time=1.029, grad_norm=118.717, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.038e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 06:48:02,159 (trainer:732) INFO: 36epoch:train:2901-3000batch: iter_time=1.528e-04, forward_time=0.145, loss_ctc=80.228, loss_att=58.977, acc=0.700, loss=65.352, backward_time=1.027, grad_norm=129.205, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.037e-05, train_time=2.710 +[gpub007:0/64] 2023-07-11 06:50:18,814 (trainer:732) INFO: 36epoch:train:3001-3100batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=70.750, loss_att=52.963, acc=0.695, loss=58.299, backward_time=1.027, grad_norm=118.744, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.036e-05, train_time=2.733 +[gpub007:0/64] 2023-07-11 06:52:34,819 (trainer:732) INFO: 36epoch:train:3101-3200batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=76.995, loss_att=59.563, acc=0.703, loss=64.793, backward_time=1.029, grad_norm=122.292, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.035e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 06:54:50,442 (trainer:732) INFO: 36epoch:train:3201-3300batch: iter_time=1.269e-04, forward_time=0.145, loss_ctc=74.668, loss_att=50.939, acc=0.720, loss=58.057, backward_time=1.027, grad_norm=106.152, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.035e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 06:55:38,347 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-11 06:55:56,625 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 06:56:00,075 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 06:56:00,075 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-11 06:56:00,081 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 07:01:43,152 (trainer:732) INFO: 36epoch:train:3301-3400batch: iter_time=1.268, forward_time=0.146, loss_ctc=79.213, loss_att=54.245, acc=0.708, loss=61.735, backward_time=1.041, grad_norm=121.553, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.034e-05, train_time=8.254 +[gpub007:0/64] 2023-07-11 07:03:59,291 (trainer:732) INFO: 36epoch:train:3401-3500batch: iter_time=1.290e-04, forward_time=0.146, loss_ctc=67.393, loss_att=52.270, acc=0.708, loss=56.807, backward_time=1.029, grad_norm=102.671, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.033e-05, train_time=2.723 +[gpub007:0/64] 2023-07-11 07:06:14,976 (trainer:732) INFO: 36epoch:train:3501-3600batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=76.608, loss_att=55.544, acc=0.695, loss=61.863, backward_time=1.026, grad_norm=119.748, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.032e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 07:08:30,641 (trainer:732) INFO: 36epoch:train:3601-3700batch: iter_time=1.285e-04, forward_time=0.147, loss_ctc=77.771, loss_att=59.178, acc=0.704, loss=64.756, backward_time=1.027, grad_norm=117.343, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.031e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 07:10:46,453 (trainer:732) INFO: 36epoch:train:3701-3800batch: iter_time=1.380e-04, forward_time=0.147, loss_ctc=76.208, loss_att=56.389, acc=0.706, loss=62.335, backward_time=1.027, grad_norm=130.722, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.030e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 07:13:02,225 (trainer:732) INFO: 36epoch:train:3801-3900batch: iter_time=1.256e-04, forward_time=0.147, loss_ctc=73.665, loss_att=51.564, acc=0.706, loss=58.194, backward_time=1.027, grad_norm=145.291, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.029e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 07:15:20,931 (trainer:732) INFO: 36epoch:train:3901-4000batch: iter_time=1.107e-04, forward_time=0.145, loss_ctc=76.136, loss_att=59.983, acc=0.694, loss=64.829, backward_time=1.030, grad_norm=125.475, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.028e-05, train_time=2.774 +[gpub007:0/64] 2023-07-11 07:17:39,066 (trainer:732) INFO: 36epoch:train:4001-4100batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=72.278, loss_att=51.631, acc=0.721, loss=57.825, backward_time=1.029, grad_norm=138.892, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.028e-05, train_time=2.762 +[gpub007:0/64] 2023-07-11 07:19:11,306 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-11 07:19:29,501 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 07:19:32,917 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 07:19:32,917 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-11 07:19:32,924 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 07:23:49,717 (trainer:732) INFO: 36epoch:train:4101-4200batch: iter_time=1.258, forward_time=0.145, loss_ctc=74.806, loss_att=47.319, acc=0.718, loss=55.565, backward_time=1.037, grad_norm=121.847, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.027e-05, train_time=7.413 +[gpub007:0/64] 2023-07-11 07:26:05,910 (trainer:732) INFO: 36epoch:train:4201-4300batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=67.421, loss_att=54.532, acc=0.698, loss=58.398, backward_time=1.027, grad_norm=116.431, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.026e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 07:28:21,761 (trainer:732) INFO: 36epoch:train:4301-4400batch: iter_time=1.398e-04, forward_time=0.146, loss_ctc=75.972, loss_att=53.682, acc=0.705, loss=60.369, backward_time=1.029, grad_norm=138.981, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.025e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 07:30:44,763 (trainer:732) INFO: 36epoch:train:4401-4500batch: iter_time=1.359e-04, forward_time=0.146, loss_ctc=77.323, loss_att=58.094, acc=0.705, loss=63.862, backward_time=1.033, grad_norm=123.716, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.024e-05, train_time=2.860 +[gpub007:0/64] 2023-07-11 07:33:00,662 (trainer:732) INFO: 36epoch:train:4501-4600batch: iter_time=1.484e-04, forward_time=0.146, loss_ctc=74.462, loss_att=56.780, acc=0.705, loss=62.084, backward_time=1.028, grad_norm=113.395, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.023e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 07:35:16,310 (trainer:732) INFO: 36epoch:train:4601-4700batch: iter_time=1.352e-04, forward_time=0.146, loss_ctc=73.043, loss_att=51.398, acc=0.707, loss=57.892, backward_time=1.026, grad_norm=111.405, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.022e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 07:37:35,085 (trainer:732) INFO: 36epoch:train:4701-4800batch: iter_time=1.391e-04, forward_time=0.146, loss_ctc=77.230, loss_att=60.521, acc=0.690, loss=65.534, backward_time=1.034, grad_norm=150.062, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.021e-05, train_time=2.775 +[gpub007:0/64] 2023-07-11 07:39:52,824 (trainer:732) INFO: 36epoch:train:4801-4900batch: iter_time=1.407e-04, forward_time=0.146, loss_ctc=74.151, loss_att=52.223, acc=0.720, loss=58.801, backward_time=1.029, grad_norm=104.004, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.021e-05, train_time=2.755 +[gpub007:0/64] 2023-07-11 07:42:08,392 (trainer:732) INFO: 36epoch:train:4901-5000batch: iter_time=1.246e-04, forward_time=0.145, loss_ctc=73.016, loss_att=47.706, acc=0.721, loss=55.299, backward_time=1.026, grad_norm=132.437, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.020e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 07:42:11,107 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-11 07:42:29,209 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 07:42:32,641 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 07:42:32,641 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-11 07:42:32,647 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 07:49:39,825 (trainer:732) INFO: 36epoch:train:5001-5100batch: iter_time=1.267, forward_time=0.146, loss_ctc=78.724, loss_att=57.628, acc=0.719, loss=63.957, backward_time=1.046, grad_norm=119.534, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.019e-05, train_time=9.028 +[gpub007:0/64] 2023-07-11 07:51:56,385 (trainer:732) INFO: 36epoch:train:5101-5200batch: iter_time=1.157e-04, forward_time=0.147, loss_ctc=64.415, loss_att=53.255, acc=0.704, loss=56.603, backward_time=1.029, grad_norm=103.847, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.018e-05, train_time=2.731 +[gpub007:0/64] 2023-07-11 07:54:12,434 (trainer:732) INFO: 36epoch:train:5201-5300batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=80.164, loss_att=59.913, acc=0.708, loss=65.989, backward_time=1.030, grad_norm=142.835, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.017e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 07:56:28,335 (trainer:732) INFO: 36epoch:train:5301-5400batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=76.029, loss_att=54.350, acc=0.712, loss=60.854, backward_time=1.029, grad_norm=131.820, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.016e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 07:58:44,536 (trainer:732) INFO: 36epoch:train:5401-5500batch: iter_time=1.224e-04, forward_time=0.148, loss_ctc=79.618, loss_att=58.225, acc=0.712, loss=64.643, backward_time=1.032, grad_norm=114.021, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.015e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 08:01:00,432 (trainer:732) INFO: 36epoch:train:5501-5600batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=69.623, loss_att=51.254, acc=0.715, loss=56.764, backward_time=1.030, grad_norm=97.814, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.014e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 08:03:16,345 (trainer:732) INFO: 36epoch:train:5601-5700batch: iter_time=1.173e-04, forward_time=0.147, loss_ctc=74.629, loss_att=59.207, acc=0.711, loss=63.834, backward_time=1.029, grad_norm=137.281, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.014e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 08:05:31,881 (trainer:732) INFO: 36epoch:train:5701-5800batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=74.023, loss_att=51.328, acc=0.720, loss=58.136, backward_time=1.027, grad_norm=124.204, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.013e-05, train_time=2.710 +[gpub007:0/64] 2023-07-11 08:06:19,407 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-11 08:06:37,660 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 08:06:41,253 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 08:06:41,253 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-11 08:06:41,260 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 08:11:40,565 (trainer:732) INFO: 36epoch:train:5801-5900batch: iter_time=1.275, forward_time=0.145, loss_ctc=72.674, loss_att=48.868, acc=0.722, loss=56.010, backward_time=1.038, grad_norm=121.683, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.012e-05, train_time=7.373 +[gpub007:0/64] 2023-07-11 08:13:56,479 (trainer:732) INFO: 36epoch:train:5901-6000batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=64.741, loss_att=51.696, acc=0.702, loss=55.609, backward_time=1.028, grad_norm=137.426, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.011e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 08:16:12,398 (trainer:732) INFO: 36epoch:train:6001-6100batch: iter_time=1.097e-04, forward_time=0.145, loss_ctc=84.266, loss_att=61.492, acc=0.695, loss=68.324, backward_time=1.027, grad_norm=129.812, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.010e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 08:18:31,179 (trainer:732) INFO: 36epoch:train:6101-6200batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=71.841, loss_att=52.502, acc=0.709, loss=58.303, backward_time=1.029, grad_norm=128.818, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.009e-05, train_time=2.775 +[gpub007:0/64] 2023-07-11 08:20:49,113 (trainer:732) INFO: 36epoch:train:6201-6300batch: iter_time=1.129e-04, forward_time=0.145, loss_ctc=78.640, loss_att=57.297, acc=0.706, loss=63.700, backward_time=1.030, grad_norm=167.589, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.008e-05, train_time=2.758 +[gpub007:0/64] 2023-07-11 08:23:04,818 (trainer:732) INFO: 36epoch:train:6301-6400batch: iter_time=1.104e-04, forward_time=0.146, loss_ctc=70.718, loss_att=51.754, acc=0.705, loss=57.443, backward_time=1.027, grad_norm=105.437, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.008e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 08:25:20,636 (trainer:732) INFO: 36epoch:train:6401-6500batch: iter_time=1.199e-04, forward_time=0.144, loss_ctc=72.706, loss_att=55.976, acc=0.710, loss=60.995, backward_time=1.028, grad_norm=115.061, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.007e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 08:27:38,686 (trainer:732) INFO: 36epoch:train:6501-6600batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=77.437, loss_att=52.920, acc=0.717, loss=60.275, backward_time=1.028, grad_norm=117.364, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.006e-05, train_time=2.761 +[gpub007:0/64] 2023-07-11 08:29:13,934 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-11 08:29:32,525 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 08:29:35,949 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 08:29:35,949 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-11 08:29:35,955 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 08:33:46,466 (trainer:732) INFO: 36epoch:train:6601-6700batch: iter_time=1.284, forward_time=0.145, loss_ctc=72.482, loss_att=51.440, acc=0.725, loss=57.752, backward_time=1.041, grad_norm=109.742, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.005e-05, train_time=7.355 +[gpub007:0/64] 2023-07-11 08:36:02,862 (trainer:732) INFO: 36epoch:train:6701-6800batch: iter_time=1.229e-04, forward_time=0.145, loss_ctc=69.482, loss_att=51.169, acc=0.717, loss=56.663, backward_time=1.030, grad_norm=117.403, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.004e-05, train_time=2.728 +[gpub007:0/64] 2023-07-11 08:38:19,556 (trainer:732) INFO: 36epoch:train:6801-6900batch: iter_time=1.265e-04, forward_time=0.146, loss_ctc=74.186, loss_att=59.903, acc=0.706, loss=64.188, backward_time=1.029, grad_norm=143.346, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.003e-05, train_time=2.734 +[gpub007:0/64] 2023-07-11 08:40:35,268 (trainer:732) INFO: 36epoch:train:6901-7000batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=77.050, loss_att=55.515, acc=0.713, loss=61.976, backward_time=1.028, grad_norm=124.246, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.002e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 08:42:51,239 (trainer:732) INFO: 36epoch:train:7001-7100batch: iter_time=1.124e-04, forward_time=0.147, loss_ctc=74.248, loss_att=54.964, acc=0.712, loss=60.749, backward_time=1.029, grad_norm=103.031, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.001e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 08:45:09,725 (trainer:732) INFO: 36epoch:train:7101-7200batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=74.611, loss_att=54.431, acc=0.722, loss=60.485, backward_time=1.038, grad_norm=112.301, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.001e-05, train_time=2.769 +[gpub007:0/64] 2023-07-11 08:47:25,551 (trainer:732) INFO: 36epoch:train:7201-7300batch: iter_time=1.169e-04, forward_time=0.147, loss_ctc=73.404, loss_att=55.914, acc=0.706, loss=61.161, backward_time=1.029, grad_norm=114.249, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.000e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 08:49:41,263 (trainer:732) INFO: 36epoch:train:7301-7400batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=73.601, loss_att=55.992, acc=0.723, loss=61.275, backward_time=1.029, grad_norm=113.768, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=5.999e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 08:51:56,745 (trainer:732) INFO: 36epoch:train:7401-7500batch: iter_time=1.104e-04, forward_time=0.146, loss_ctc=74.974, loss_att=47.700, acc=0.719, loss=55.882, backward_time=1.027, grad_norm=120.438, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=5.998e-05, train_time=2.709 +[gpub007:0/64] 2023-07-11 08:51:59,462 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-11 08:52:17,396 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 08:52:20,832 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 08:52:20,832 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-11 08:52:20,838 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 08:58:46,396 (trainer:732) INFO: 36epoch:train:7501-7600batch: iter_time=1.294, forward_time=0.175, loss_ctc=75.220, loss_att=54.638, acc=0.725, loss=60.812, backward_time=1.040, grad_norm=124.573, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=5.997e-05, train_time=8.192 +[gpub007:0/64] 2023-07-11 09:01:03,359 (trainer:732) INFO: 36epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=64.370, loss_att=52.964, acc=0.705, loss=56.386, backward_time=1.032, grad_norm=109.928, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=5.996e-05, train_time=2.739 +[gpub007:0/64] 2023-07-11 09:03:21,561 (trainer:732) INFO: 36epoch:train:7701-7800batch: iter_time=1.244e-04, forward_time=0.147, loss_ctc=78.666, loss_att=58.239, acc=0.712, loss=64.367, backward_time=1.031, grad_norm=123.469, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=5.995e-05, train_time=2.764 +[gpub007:0/64] 2023-07-11 09:05:37,601 (trainer:732) INFO: 36epoch:train:7801-7900batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=74.832, loss_att=53.817, acc=0.714, loss=60.122, backward_time=1.031, grad_norm=105.908, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=5.995e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 09:07:53,599 (trainer:732) INFO: 36epoch:train:7901-8000batch: iter_time=1.345e-04, forward_time=0.146, loss_ctc=79.369, loss_att=57.571, acc=0.714, loss=64.110, backward_time=1.031, grad_norm=110.450, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=5.994e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 09:10:09,362 (trainer:732) INFO: 36epoch:train:8001-8100batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=69.459, loss_att=51.312, acc=0.713, loss=56.756, backward_time=1.028, grad_norm=112.586, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.993e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 09:12:25,536 (trainer:732) INFO: 36epoch:train:8101-8200batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=73.031, loss_att=57.418, acc=0.718, loss=62.102, backward_time=1.029, grad_norm=137.616, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.992e-05, train_time=2.723 +[gpub007:0/64] 2023-07-11 09:14:41,301 (trainer:732) INFO: 36epoch:train:8201-8300batch: iter_time=1.259e-04, forward_time=0.147, loss_ctc=74.386, loss_att=51.113, acc=0.723, loss=58.095, backward_time=1.027, grad_norm=132.786, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.991e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 09:15:40,897 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-11 09:15:59,079 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 09:16:02,817 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 09:16:02,818 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-11 09:16:02,824 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 09:20:23,879 (trainer:732) INFO: 36epoch:train:8301-8400batch: iter_time=1.948, forward_time=0.148, loss_ctc=77.413, loss_att=53.279, acc=0.714, loss=60.519, backward_time=1.046, grad_norm=114.993, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.990e-05, train_time=6.851 +[gpub007:0/64] 2023-07-11 09:22:40,474 (trainer:732) INFO: 36epoch:train:8401-8500batch: iter_time=1.305e-04, forward_time=0.145, loss_ctc=67.555, loss_att=53.104, acc=0.710, loss=57.439, backward_time=1.029, grad_norm=107.182, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.989e-05, train_time=2.732 +[gpub007:0/64] 2023-07-11 09:24:56,628 (trainer:732) INFO: 36epoch:train:8501-8600batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=74.579, loss_att=53.348, acc=0.704, loss=59.718, backward_time=1.030, grad_norm=141.022, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.989e-05, train_time=2.723 +[gpub007:0/64] 2023-07-11 09:27:13,261 (trainer:732) INFO: 36epoch:train:8601-8700batch: iter_time=1.146e-04, forward_time=0.147, loss_ctc=76.259, loss_att=58.412, acc=0.705, loss=63.766, backward_time=1.031, grad_norm=118.202, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.988e-05, train_time=2.732 +[gpub007:0/64] 2023-07-11 09:29:29,625 (trainer:732) INFO: 36epoch:train:8701-8800batch: iter_time=1.134e-04, forward_time=0.145, loss_ctc=74.279, loss_att=56.323, acc=0.706, loss=61.710, backward_time=1.032, grad_norm=102.642, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.987e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 09:31:45,648 (trainer:732) INFO: 36epoch:train:8801-8900batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=73.739, loss_att=51.773, acc=0.708, loss=58.363, backward_time=1.027, grad_norm=105.295, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.986e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 09:34:03,701 (trainer:732) INFO: 36epoch:train:8901-9000batch: iter_time=1.083e-04, forward_time=0.145, loss_ctc=75.303, loss_att=59.854, acc=0.698, loss=64.489, backward_time=1.031, grad_norm=127.216, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.985e-05, train_time=2.761 +[gpub007:0/64] 2023-07-11 09:36:22,691 (trainer:732) INFO: 36epoch:train:9001-9100batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=72.461, loss_att=51.721, acc=0.723, loss=57.943, backward_time=1.032, grad_norm=101.159, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.984e-05, train_time=2.780 +[gpub007:0/64] 2023-07-11 09:37:55,959 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-11 09:38:14,194 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 09:38:17,890 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 09:38:17,890 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-11 09:38:17,896 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 09:42:47,670 (trainer:732) INFO: 36epoch:train:9101-9200batch: iter_time=1.279, forward_time=0.146, loss_ctc=77.700, loss_att=53.951, acc=0.712, loss=61.076, backward_time=1.039, grad_norm=125.494, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.983e-05, train_time=7.699 +[gpub007:0/64] 2023-07-11 09:45:03,895 (trainer:732) INFO: 36epoch:train:9201-9300batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=67.907, loss_att=47.197, acc=0.724, loss=53.410, backward_time=1.030, grad_norm=103.631, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.983e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 09:47:21,117 (trainer:732) INFO: 36epoch:train:9301-9400batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=75.412, loss_att=58.609, acc=0.698, loss=63.650, backward_time=1.030, grad_norm=120.995, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.982e-05, train_time=2.744 +[gpub007:0/64] 2023-07-11 09:49:36,984 (trainer:732) INFO: 36epoch:train:9401-9500batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=74.342, loss_att=54.732, acc=0.709, loss=60.615, backward_time=1.028, grad_norm=108.000, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.981e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 09:51:53,225 (trainer:732) INFO: 36epoch:train:9501-9600batch: iter_time=1.300e-04, forward_time=0.146, loss_ctc=74.148, loss_att=54.822, acc=0.706, loss=60.620, backward_time=1.027, grad_norm=115.651, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.980e-05, train_time=2.725 +[gpub007:0/64] 2023-07-11 09:54:09,043 (trainer:732) INFO: 36epoch:train:9601-9700batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=74.275, loss_att=53.389, acc=0.713, loss=59.655, backward_time=1.026, grad_norm=129.263, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.979e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 09:56:24,844 (trainer:732) INFO: 36epoch:train:9701-9800batch: iter_time=1.189e-04, forward_time=0.146, loss_ctc=74.105, loss_att=55.939, acc=0.695, loss=61.388, backward_time=1.027, grad_norm=127.869, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.978e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 09:58:40,793 (trainer:732) INFO: 36epoch:train:9801-9900batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=71.580, loss_att=55.299, acc=0.723, loss=60.184, backward_time=1.029, grad_norm=134.775, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.977e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 10:00:56,221 (trainer:732) INFO: 36epoch:train:9901-10000batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=73.729, loss_att=45.704, acc=0.728, loss=54.112, backward_time=1.025, grad_norm=134.085, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.977e-05, train_time=2.708 +[gpub007:0/64] 2023-07-11 10:13:19,578 (trainer:338) INFO: 36epoch results: [train] iter_time=0.164, forward_time=0.147, loss_ctc=74.624, loss_att=54.788, acc=0.709, loss=60.739, backward_time=1.030, grad_norm=121.600, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.019e-05, train_time=3.275, time=4 hours, 33 minutes and 8.28 seconds, total_count=330000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=45.403, cer_ctc=0.261, loss_att=40.942, acc=0.659, cer=0.438, wer=1.000, loss=42.280, time=6 minutes and 16.69 seconds, total_count=33902, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 56.42 seconds, total_count=0, gpu_max_cached_mem_GB=37.219 +[gpub007:0/64] 2023-07-11 10:13:35,156 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub007:0/64] 2023-07-11 10:13:35,262 (trainer:272) INFO: 37/50epoch started. Estimated time to finish: 2 days, 19 hours and 36 minutes +[gpub007:0/64] 2023-07-11 10:13:35,265 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-11 10:13:53,068 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 10:13:56,420 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 10:13:56,420 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-11 10:13:56,426 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 10:18:04,588 (trainer:732) INFO: 37epoch:train:1-100batch: iter_time=1.275, forward_time=0.148, loss_ctc=73.396, loss_att=59.771, acc=0.684, loss=63.858, backward_time=1.041, grad_norm=127.507, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.976e-05, train_time=5.386 +[gpub007:0/64] 2023-07-11 10:20:41,737 (trainer:732) INFO: 37epoch:train:101-200batch: iter_time=4.899e-04, forward_time=0.313, loss_ctc=79.529, loss_att=56.868, acc=0.699, loss=63.667, backward_time=1.051, grad_norm=117.632, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.188, optim0_lr0=5.975e-05, train_time=3.143 +[gpub007:0/64] 2023-07-11 10:22:59,271 (trainer:732) INFO: 37epoch:train:201-300batch: iter_time=1.262e-04, forward_time=0.146, loss_ctc=69.740, loss_att=52.186, acc=0.698, loss=57.452, backward_time=1.026, grad_norm=104.660, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.974e-05, train_time=2.750 +[gpub007:0/64] 2023-07-11 10:25:16,426 (trainer:732) INFO: 37epoch:train:301-400batch: iter_time=1.299e-04, forward_time=0.144, loss_ctc=72.292, loss_att=54.165, acc=0.684, loss=59.603, backward_time=1.025, grad_norm=125.518, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.973e-05, train_time=2.741 +[gpub007:0/64] 2023-07-11 10:27:34,463 (trainer:732) INFO: 37epoch:train:401-500batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=63.720, loss_att=46.522, acc=0.727, loss=51.681, backward_time=1.027, grad_norm=118.458, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.972e-05, train_time=2.762 +[gpub007:0/64] 2023-07-11 10:29:56,086 (trainer:732) INFO: 37epoch:train:501-600batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=69.319, loss_att=52.141, acc=0.697, loss=57.294, backward_time=1.056, grad_norm=122.864, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.971e-05, train_time=2.832 +[gpub007:0/64] 2023-07-11 10:32:21,236 (trainer:732) INFO: 37epoch:train:601-700batch: iter_time=1.347e-04, forward_time=0.146, loss_ctc=66.499, loss_att=47.009, acc=0.715, loss=52.856, backward_time=1.042, grad_norm=121.537, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.971e-05, train_time=2.903 +[gpub007:0/64] 2023-07-11 10:34:49,561 (trainer:732) INFO: 37epoch:train:701-800batch: iter_time=1.320e-04, forward_time=0.145, loss_ctc=68.531, loss_att=53.636, acc=0.701, loss=58.104, backward_time=1.042, grad_norm=119.565, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.970e-05, train_time=2.966 +[gpub007:0/64] 2023-07-11 10:35:48,543 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-11 10:36:05,996 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 10:36:09,370 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 10:36:09,370 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-11 10:36:09,376 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 10:41:12,024 (trainer:732) INFO: 37epoch:train:801-900batch: iter_time=1.591, forward_time=0.146, loss_ctc=68.487, loss_att=51.202, acc=0.698, loss=56.387, backward_time=1.051, grad_norm=122.913, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.969e-05, train_time=7.649 +[gpub007:0/64] 2023-07-11 10:43:29,382 (trainer:732) INFO: 37epoch:train:901-1000batch: iter_time=1.321e-04, forward_time=0.148, loss_ctc=77.861, loss_att=60.035, acc=0.704, loss=65.383, backward_time=1.033, grad_norm=121.738, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.968e-05, train_time=2.747 +[gpub007:0/64] 2023-07-11 10:45:45,646 (trainer:732) INFO: 37epoch:train:1001-1100batch: iter_time=1.477e-04, forward_time=0.147, loss_ctc=70.750, loss_att=50.708, acc=0.712, loss=56.720, backward_time=1.030, grad_norm=104.533, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.967e-05, train_time=2.725 +[gpub007:0/64] 2023-07-11 10:48:01,721 (trainer:732) INFO: 37epoch:train:1101-1200batch: iter_time=1.313e-04, forward_time=0.147, loss_ctc=74.678, loss_att=52.029, acc=0.709, loss=58.824, backward_time=1.029, grad_norm=112.328, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.966e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 10:50:17,245 (trainer:732) INFO: 37epoch:train:1201-1300batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=66.879, loss_att=49.112, acc=0.706, loss=54.442, backward_time=1.027, grad_norm=114.777, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.965e-05, train_time=2.710 +[gpub007:0/64] 2023-07-11 10:52:33,064 (trainer:732) INFO: 37epoch:train:1301-1400batch: iter_time=1.189e-04, forward_time=0.146, loss_ctc=64.111, loss_att=46.411, acc=0.732, loss=51.721, backward_time=1.029, grad_norm=109.227, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.965e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 10:54:49,009 (trainer:732) INFO: 37epoch:train:1401-1500batch: iter_time=1.101e-04, forward_time=0.145, loss_ctc=72.953, loss_att=53.605, acc=0.719, loss=59.410, backward_time=1.029, grad_norm=125.380, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.964e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 10:57:04,671 (trainer:732) INFO: 37epoch:train:1501-1600batch: iter_time=1.076e-04, forward_time=0.145, loss_ctc=63.215, loss_att=47.727, acc=0.712, loss=52.373, backward_time=1.027, grad_norm=122.014, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.963e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 10:58:36,988 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-11 10:58:55,473 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 10:58:58,896 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 10:58:58,896 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-11 10:58:58,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 11:03:02,868 (trainer:732) INFO: 37epoch:train:1601-1700batch: iter_time=1.300, forward_time=0.147, loss_ctc=68.034, loss_att=50.001, acc=0.717, loss=55.411, backward_time=1.037, grad_norm=108.329, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.962e-05, train_time=7.164 +[gpub007:0/64] 2023-07-11 11:05:19,465 (trainer:732) INFO: 37epoch:train:1701-1800batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=74.537, loss_att=60.118, acc=0.683, loss=64.444, backward_time=1.030, grad_norm=127.510, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.961e-05, train_time=2.732 +[gpub007:0/64] 2023-07-11 11:07:35,430 (trainer:732) INFO: 37epoch:train:1801-1900batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=74.236, loss_att=51.703, acc=0.713, loss=58.463, backward_time=1.027, grad_norm=148.643, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.960e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 11:09:51,405 (trainer:732) INFO: 37epoch:train:1901-2000batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=69.162, loss_att=54.264, acc=0.695, loss=58.734, backward_time=1.028, grad_norm=109.083, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.960e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 11:12:07,317 (trainer:732) INFO: 37epoch:train:2001-2100batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=69.839, loss_att=51.446, acc=0.698, loss=56.964, backward_time=1.029, grad_norm=123.054, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.959e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 11:14:22,887 (trainer:732) INFO: 37epoch:train:2101-2200batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=65.376, loss_att=47.470, acc=0.727, loss=52.842, backward_time=1.027, grad_norm=98.798, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.958e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 11:16:38,522 (trainer:732) INFO: 37epoch:train:2201-2300batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=67.293, loss_att=50.480, acc=0.704, loss=55.524, backward_time=1.027, grad_norm=120.763, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.957e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 11:18:54,131 (trainer:732) INFO: 37epoch:train:2301-2400batch: iter_time=1.193e-04, forward_time=0.145, loss_ctc=66.950, loss_att=48.792, acc=0.716, loss=54.240, backward_time=1.026, grad_norm=115.115, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.956e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 11:21:09,741 (trainer:732) INFO: 37epoch:train:2401-2500batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=65.267, loss_att=51.255, acc=0.708, loss=55.459, backward_time=1.028, grad_norm=143.003, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.955e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 11:21:11,290 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-11 11:21:29,387 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 11:21:32,829 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 11:21:32,829 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-11 11:21:32,835 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 11:27:41,279 (trainer:732) INFO: 37epoch:train:2501-2600batch: iter_time=1.271, forward_time=0.147, loss_ctc=74.512, loss_att=56.767, acc=0.713, loss=62.090, backward_time=1.045, grad_norm=114.692, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.954e-05, train_time=7.831 +[gpub007:0/64] 2023-07-11 11:29:57,752 (trainer:732) INFO: 37epoch:train:2601-2700batch: iter_time=1.219e-04, forward_time=0.147, loss_ctc=74.458, loss_att=51.708, acc=0.717, loss=58.533, backward_time=1.029, grad_norm=117.615, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.954e-05, train_time=2.729 +[gpub007:0/64] 2023-07-11 11:32:13,486 (trainer:732) INFO: 37epoch:train:2701-2800batch: iter_time=1.187e-04, forward_time=0.147, loss_ctc=70.284, loss_att=52.794, acc=0.709, loss=58.041, backward_time=1.028, grad_norm=121.598, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.953e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 11:34:29,076 (trainer:732) INFO: 37epoch:train:2801-2900batch: iter_time=1.256e-04, forward_time=0.146, loss_ctc=73.517, loss_att=52.961, acc=0.704, loss=59.128, backward_time=1.027, grad_norm=124.741, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.952e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 11:36:44,873 (trainer:732) INFO: 37epoch:train:2901-3000batch: iter_time=1.128e-04, forward_time=0.147, loss_ctc=62.621, loss_att=46.877, acc=0.726, loss=51.600, backward_time=1.028, grad_norm=106.802, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.951e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 11:39:11,472 (trainer:732) INFO: 37epoch:train:3001-3100batch: iter_time=1.164e-04, forward_time=0.147, loss_ctc=65.661, loss_att=48.617, acc=0.723, loss=53.730, backward_time=1.043, grad_norm=107.600, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.950e-05, train_time=2.932 +[gpub007:0/64] 2023-07-11 11:41:29,942 (trainer:732) INFO: 37epoch:train:3101-3200batch: iter_time=1.193e-04, forward_time=0.171, loss_ctc=66.728, loss_att=47.652, acc=0.723, loss=53.375, backward_time=1.029, grad_norm=108.349, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.183, optim0_lr0=5.949e-05, train_time=2.769 +[gpub007:0/64] 2023-07-11 11:43:51,934 (trainer:732) INFO: 37epoch:train:3201-3300batch: iter_time=1.278e-04, forward_time=0.147, loss_ctc=66.736, loss_att=50.802, acc=0.718, loss=55.582, backward_time=1.032, grad_norm=117.782, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.949e-05, train_time=2.840 +[gpub007:0/64] 2023-07-11 11:44:38,810 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-11 11:44:56,441 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 11:44:59,880 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 11:44:59,880 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-11 11:44:59,886 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 11:49:53,465 (trainer:732) INFO: 37epoch:train:3301-3400batch: iter_time=1.328, forward_time=0.213, loss_ctc=71.037, loss_att=56.389, acc=0.703, loss=60.783, backward_time=1.047, grad_norm=112.623, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.187, optim0_lr0=5.948e-05, train_time=7.230 +[gpub007:0/64] 2023-07-11 11:52:09,588 (trainer:732) INFO: 37epoch:train:3401-3500batch: iter_time=1.053e-04, forward_time=0.145, loss_ctc=75.136, loss_att=53.435, acc=0.710, loss=59.945, backward_time=1.028, grad_norm=107.148, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.947e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 11:54:25,665 (trainer:732) INFO: 37epoch:train:3501-3600batch: iter_time=1.056e-04, forward_time=0.145, loss_ctc=70.639, loss_att=52.362, acc=0.706, loss=57.845, backward_time=1.028, grad_norm=126.243, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.946e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 11:56:41,580 (trainer:732) INFO: 37epoch:train:3601-3700batch: iter_time=1.267e-04, forward_time=0.145, loss_ctc=73.037, loss_att=52.505, acc=0.709, loss=58.665, backward_time=1.027, grad_norm=134.571, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.945e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 11:58:57,135 (trainer:732) INFO: 37epoch:train:3701-3800batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=62.880, loss_att=47.302, acc=0.721, loss=51.975, backward_time=1.027, grad_norm=95.988, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.944e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 12:01:12,873 (trainer:732) INFO: 37epoch:train:3801-3900batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=67.521, loss_att=49.809, acc=0.723, loss=55.123, backward_time=1.028, grad_norm=130.044, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.944e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 12:03:28,186 (trainer:732) INFO: 37epoch:train:3901-4000batch: iter_time=1.065e-04, forward_time=0.143, loss_ctc=65.221, loss_att=46.138, acc=0.727, loss=51.863, backward_time=1.023, grad_norm=99.350, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.181, optim0_lr0=5.943e-05, train_time=2.706 +[gpub007:0/64] 2023-07-11 12:05:44,173 (trainer:732) INFO: 37epoch:train:4001-4100batch: iter_time=1.056e-04, forward_time=0.145, loss_ctc=68.017, loss_att=51.912, acc=0.717, loss=56.744, backward_time=1.029, grad_norm=120.104, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.181, optim0_lr0=5.942e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 12:07:23,657 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-11 12:07:41,458 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 12:07:44,857 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 12:07:44,858 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-11 12:07:44,864 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 12:11:58,637 (trainer:732) INFO: 37epoch:train:4101-4200batch: iter_time=1.304, forward_time=0.146, loss_ctc=66.351, loss_att=47.177, acc=0.724, loss=52.929, backward_time=1.106, grad_norm=98.938, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.941e-05, train_time=7.489 +[gpub007:0/64] 2023-07-11 12:14:15,395 (trainer:732) INFO: 37epoch:train:4201-4300batch: iter_time=1.148e-04, forward_time=0.145, loss_ctc=74.530, loss_att=57.084, acc=0.705, loss=62.318, backward_time=1.030, grad_norm=121.127, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.940e-05, train_time=2.735 +[gpub007:0/64] 2023-07-11 12:16:31,965 (trainer:732) INFO: 37epoch:train:4301-4400batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=74.338, loss_att=51.001, acc=0.718, loss=58.002, backward_time=1.033, grad_norm=122.361, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.939e-05, train_time=2.731 +[gpub007:0/64] 2023-07-11 12:18:48,042 (trainer:732) INFO: 37epoch:train:4401-4500batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=68.798, loss_att=52.278, acc=0.710, loss=57.234, backward_time=1.029, grad_norm=130.252, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.938e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 12:21:03,702 (trainer:732) INFO: 37epoch:train:4501-4600batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=68.909, loss_att=51.637, acc=0.704, loss=56.819, backward_time=1.027, grad_norm=137.823, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.938e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 12:23:19,226 (trainer:732) INFO: 37epoch:train:4601-4700batch: iter_time=1.181e-04, forward_time=0.144, loss_ctc=65.441, loss_att=46.367, acc=0.736, loss=52.090, backward_time=1.026, grad_norm=93.332, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.937e-05, train_time=2.710 +[gpub007:0/64] 2023-07-11 12:25:34,979 (trainer:732) INFO: 37epoch:train:4701-4800batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=67.536, loss_att=49.512, acc=0.722, loss=54.919, backward_time=1.028, grad_norm=104.177, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.936e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 12:27:50,659 (trainer:732) INFO: 37epoch:train:4801-4900batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=66.204, loss_att=48.003, acc=0.721, loss=53.464, backward_time=1.028, grad_norm=117.366, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.935e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 12:30:06,421 (trainer:732) INFO: 37epoch:train:4901-5000batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=66.198, loss_att=51.812, acc=0.714, loss=56.128, backward_time=1.027, grad_norm=96.135, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.934e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 12:30:07,967 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-11 12:30:26,550 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 12:30:30,016 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 12:30:30,016 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-11 12:30:30,022 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 12:37:05,336 (trainer:732) INFO: 37epoch:train:5001-5100batch: iter_time=1.338, forward_time=0.146, loss_ctc=72.943, loss_att=57.395, acc=0.704, loss=62.059, backward_time=1.048, grad_norm=113.638, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.181, optim0_lr0=5.933e-05, train_time=8.378 +[gpub007:0/64] 2023-07-11 12:39:21,678 (trainer:732) INFO: 37epoch:train:5101-5200batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=72.681, loss_att=52.867, acc=0.715, loss=58.811, backward_time=1.028, grad_norm=128.302, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.181, optim0_lr0=5.933e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 12:41:48,738 (trainer:732) INFO: 37epoch:train:5201-5300batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=69.373, loss_att=54.118, acc=0.697, loss=58.695, backward_time=1.087, grad_norm=117.537, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.932e-05, train_time=2.941 +[gpub007:0/64] 2023-07-11 12:44:08,249 (trainer:732) INFO: 37epoch:train:5301-5400batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=73.062, loss_att=52.696, acc=0.703, loss=58.806, backward_time=1.034, grad_norm=120.355, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.931e-05, train_time=2.790 +[gpub007:0/64] 2023-07-11 12:46:23,958 (trainer:732) INFO: 37epoch:train:5401-5500batch: iter_time=1.420e-04, forward_time=0.145, loss_ctc=62.483, loss_att=47.374, acc=0.719, loss=51.907, backward_time=1.027, grad_norm=98.551, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.930e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 12:48:39,675 (trainer:732) INFO: 37epoch:train:5501-5600batch: iter_time=1.315e-04, forward_time=0.146, loss_ctc=65.916, loss_att=50.161, acc=0.712, loss=54.887, backward_time=1.027, grad_norm=100.155, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.929e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 12:50:54,988 (trainer:732) INFO: 37epoch:train:5601-5700batch: iter_time=1.339e-04, forward_time=0.145, loss_ctc=66.600, loss_att=47.484, acc=0.721, loss=53.219, backward_time=1.025, grad_norm=107.539, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.928e-05, train_time=2.706 +[gpub007:0/64] 2023-07-11 12:53:11,049 (trainer:732) INFO: 37epoch:train:5701-5800batch: iter_time=1.475e-04, forward_time=0.146, loss_ctc=66.766, loss_att=49.900, acc=0.718, loss=54.960, backward_time=1.031, grad_norm=110.353, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.928e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 12:53:57,706 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-11 12:54:15,837 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 12:54:19,255 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 12:54:19,255 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-11 12:54:19,261 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 12:58:52,511 (trainer:732) INFO: 37epoch:train:5801-5900batch: iter_time=1.304, forward_time=0.147, loss_ctc=69.520, loss_att=53.386, acc=0.699, loss=58.227, backward_time=1.040, grad_norm=113.201, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.927e-05, train_time=6.829 +[gpub007:0/64] 2023-07-11 13:01:09,640 (trainer:732) INFO: 37epoch:train:5901-6000batch: iter_time=1.520e-04, forward_time=0.146, loss_ctc=76.000, loss_att=58.654, acc=0.707, loss=63.858, backward_time=1.032, grad_norm=114.478, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.926e-05, train_time=2.742 +[gpub007:0/64] 2023-07-11 13:03:25,191 (trainer:732) INFO: 37epoch:train:6001-6100batch: iter_time=1.551e-04, forward_time=0.145, loss_ctc=70.812, loss_att=52.626, acc=0.705, loss=58.081, backward_time=1.027, grad_norm=112.754, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.925e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 13:05:41,220 (trainer:732) INFO: 37epoch:train:6101-6200batch: iter_time=1.743e-04, forward_time=0.147, loss_ctc=72.313, loss_att=50.371, acc=0.709, loss=56.954, backward_time=1.030, grad_norm=117.075, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.924e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 13:07:57,111 (trainer:732) INFO: 37epoch:train:6201-6300batch: iter_time=1.548e-04, forward_time=0.147, loss_ctc=63.890, loss_att=47.404, acc=0.710, loss=52.350, backward_time=1.030, grad_norm=103.617, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.923e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 13:10:12,706 (trainer:732) INFO: 37epoch:train:6301-6400batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=65.401, loss_att=48.200, acc=0.729, loss=53.360, backward_time=1.027, grad_norm=118.256, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.923e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 13:12:28,722 (trainer:732) INFO: 37epoch:train:6401-6500batch: iter_time=1.029e-04, forward_time=0.145, loss_ctc=69.854, loss_att=53.100, acc=0.710, loss=58.126, backward_time=1.028, grad_norm=117.987, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.181, optim0_lr0=5.922e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 13:14:44,493 (trainer:732) INFO: 37epoch:train:6501-6600batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=64.042, loss_att=46.908, acc=0.722, loss=52.048, backward_time=1.027, grad_norm=115.001, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.921e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 13:16:18,783 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-11 13:16:37,030 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 13:16:40,470 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 13:16:40,470 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-11 13:16:40,476 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 13:21:07,299 (trainer:732) INFO: 37epoch:train:6601-6700batch: iter_time=1.318, forward_time=0.145, loss_ctc=67.741, loss_att=48.355, acc=0.718, loss=54.171, backward_time=1.039, grad_norm=109.415, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.920e-05, train_time=7.656 +[gpub007:0/64] 2023-07-11 13:23:28,242 (trainer:732) INFO: 37epoch:train:6701-6800batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=72.885, loss_att=57.769, acc=0.697, loss=62.304, backward_time=1.039, grad_norm=111.526, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.919e-05, train_time=2.819 +[gpub007:0/64] 2023-07-11 13:25:56,628 (trainer:732) INFO: 37epoch:train:6801-6900batch: iter_time=2.967e-04, forward_time=0.166, loss_ctc=76.117, loss_att=52.490, acc=0.713, loss=59.578, backward_time=1.050, grad_norm=103.003, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.918e-05, train_time=2.967 +[gpub007:0/64] 2023-07-11 13:28:28,303 (trainer:732) INFO: 37epoch:train:6901-7000batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=68.149, loss_att=52.988, acc=0.705, loss=57.537, backward_time=1.046, grad_norm=105.892, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.918e-05, train_time=3.033 +[gpub007:0/64] 2023-07-11 13:30:46,658 (trainer:732) INFO: 37epoch:train:7001-7100batch: iter_time=1.170e-04, forward_time=0.149, loss_ctc=68.961, loss_att=52.298, acc=0.701, loss=57.297, backward_time=1.032, grad_norm=110.890, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.917e-05, train_time=2.767 +[gpub007:0/64] 2023-07-11 13:33:03,183 (trainer:732) INFO: 37epoch:train:7101-7200batch: iter_time=1.397e-04, forward_time=0.145, loss_ctc=63.978, loss_att=47.625, acc=0.728, loss=52.531, backward_time=1.028, grad_norm=115.364, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.916e-05, train_time=2.730 +[gpub007:0/64] 2023-07-11 13:35:33,789 (trainer:732) INFO: 37epoch:train:7201-7300batch: iter_time=1.228e-04, forward_time=0.154, loss_ctc=68.812, loss_att=51.038, acc=0.706, loss=56.370, backward_time=1.048, grad_norm=126.657, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.183, optim0_lr0=5.915e-05, train_time=3.012 +[gpub007:0/64] 2023-07-11 13:37:49,663 (trainer:732) INFO: 37epoch:train:7301-7400batch: iter_time=1.305e-04, forward_time=0.146, loss_ctc=64.371, loss_att=46.270, acc=0.726, loss=51.700, backward_time=1.029, grad_norm=103.992, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.914e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 13:40:12,866 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-11 13:40:30,561 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 13:40:34,240 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 13:40:34,241 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-11 13:40:34,247 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 13:43:52,044 (trainer:732) INFO: 37epoch:train:7401-7500batch: iter_time=1.627, forward_time=0.172, loss_ctc=64.992, loss_att=50.931, acc=0.714, loss=55.150, backward_time=1.035, grad_norm=108.730, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.183, optim0_lr0=5.914e-05, train_time=7.245 +[gpub007:0/64] 2023-07-11 13:46:11,448 (trainer:732) INFO: 37epoch:train:7501-7600batch: iter_time=1.250e-04, forward_time=0.147, loss_ctc=72.067, loss_att=55.669, acc=0.704, loss=60.588, backward_time=1.041, grad_norm=122.676, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.913e-05, train_time=2.790 +[gpub007:0/64] 2023-07-11 13:48:27,455 (trainer:732) INFO: 37epoch:train:7601-7700batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=76.923, loss_att=52.967, acc=0.716, loss=60.154, backward_time=1.028, grad_norm=120.730, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.912e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 13:50:43,068 (trainer:732) INFO: 37epoch:train:7701-7800batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=68.362, loss_att=52.743, acc=0.709, loss=57.429, backward_time=1.026, grad_norm=101.121, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.911e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 13:52:58,971 (trainer:732) INFO: 37epoch:train:7801-7900batch: iter_time=1.327e-04, forward_time=0.146, loss_ctc=70.113, loss_att=53.594, acc=0.703, loss=58.550, backward_time=1.029, grad_norm=129.743, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.910e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 13:55:14,656 (trainer:732) INFO: 37epoch:train:7901-8000batch: iter_time=1.337e-04, forward_time=0.145, loss_ctc=62.823, loss_att=45.049, acc=0.737, loss=50.381, backward_time=1.028, grad_norm=119.916, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.909e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 13:57:30,308 (trainer:732) INFO: 37epoch:train:8001-8100batch: iter_time=1.168e-04, forward_time=0.145, loss_ctc=70.501, loss_att=52.455, acc=0.713, loss=57.869, backward_time=1.028, grad_norm=98.464, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.909e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 13:59:46,046 (trainer:732) INFO: 37epoch:train:8101-8200batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=63.191, loss_att=46.242, acc=0.722, loss=51.327, backward_time=1.028, grad_norm=102.264, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.908e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 14:02:02,040 (trainer:732) INFO: 37epoch:train:8201-8300batch: iter_time=1.025e-04, forward_time=0.146, loss_ctc=66.031, loss_att=51.173, acc=0.716, loss=55.631, backward_time=1.030, grad_norm=96.635, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.907e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 14:02:51,500 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-11 14:03:10,181 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 14:03:13,860 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 14:03:13,860 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-11 14:03:13,866 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 14:09:27,748 (trainer:732) INFO: 37epoch:train:8301-8400batch: iter_time=1.822, forward_time=0.199, loss_ctc=71.059, loss_att=55.503, acc=0.696, loss=60.170, backward_time=1.040, grad_norm=121.867, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.185, optim0_lr0=5.906e-05, train_time=8.914 +[gpub007:0/64] 2023-07-11 14:11:45,404 (trainer:732) INFO: 37epoch:train:8401-8500batch: iter_time=1.342e-04, forward_time=0.146, loss_ctc=74.470, loss_att=53.384, acc=0.713, loss=59.710, backward_time=1.030, grad_norm=123.956, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.905e-05, train_time=2.753 +[gpub007:0/64] 2023-07-11 14:14:01,744 (trainer:732) INFO: 37epoch:train:8501-8600batch: iter_time=1.273e-04, forward_time=0.147, loss_ctc=69.650, loss_att=54.100, acc=0.696, loss=58.765, backward_time=1.029, grad_norm=107.083, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.904e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 14:16:17,478 (trainer:732) INFO: 37epoch:train:8601-8700batch: iter_time=1.268e-04, forward_time=0.146, loss_ctc=71.456, loss_att=51.857, acc=0.706, loss=57.737, backward_time=1.029, grad_norm=111.129, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.904e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 14:18:33,020 (trainer:732) INFO: 37epoch:train:8701-8800batch: iter_time=1.248e-04, forward_time=0.144, loss_ctc=63.389, loss_att=47.118, acc=0.718, loss=51.999, backward_time=1.027, grad_norm=125.895, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.903e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 14:20:50,384 (trainer:732) INFO: 37epoch:train:8801-8900batch: iter_time=1.076e-04, forward_time=0.144, loss_ctc=67.778, loss_att=51.668, acc=0.706, loss=56.501, backward_time=1.028, grad_norm=110.082, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.902e-05, train_time=2.747 +[gpub007:0/64] 2023-07-11 14:23:06,644 (trainer:732) INFO: 37epoch:train:8901-9000batch: iter_time=1.167e-04, forward_time=0.144, loss_ctc=63.860, loss_att=45.761, acc=0.724, loss=51.191, backward_time=1.030, grad_norm=102.661, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.901e-05, train_time=2.725 +[gpub007:0/64] 2023-07-11 14:25:22,073 (trainer:732) INFO: 37epoch:train:9001-9100batch: iter_time=1.318e-04, forward_time=0.143, loss_ctc=67.755, loss_att=50.844, acc=0.717, loss=55.917, backward_time=1.025, grad_norm=114.657, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.900e-05, train_time=2.708 +[gpub007:0/64] 2023-07-11 14:27:14,523 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-11 14:27:32,936 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 14:27:36,432 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 14:27:36,432 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-11 14:27:36,438 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 14:31:34,585 (trainer:732) INFO: 37epoch:train:9101-9200batch: iter_time=2.226, forward_time=0.145, loss_ctc=66.487, loss_att=47.631, acc=0.719, loss=53.288, backward_time=1.042, grad_norm=100.918, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.900e-05, train_time=7.450 +[gpub007:0/64] 2023-07-11 14:33:51,375 (trainer:732) INFO: 37epoch:train:9201-9300batch: iter_time=1.135e-04, forward_time=0.147, loss_ctc=72.194, loss_att=55.790, acc=0.708, loss=60.711, backward_time=1.032, grad_norm=113.211, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.899e-05, train_time=2.736 +[gpub007:0/64] 2023-07-11 14:36:08,125 (trainer:732) INFO: 37epoch:train:9301-9400batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=73.933, loss_att=50.691, acc=0.724, loss=57.663, backward_time=1.030, grad_norm=106.909, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.898e-05, train_time=2.735 +[gpub007:0/64] 2023-07-11 14:38:24,281 (trainer:732) INFO: 37epoch:train:9401-9500batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=69.115, loss_att=52.228, acc=0.711, loss=57.294, backward_time=1.029, grad_norm=102.198, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.897e-05, train_time=2.723 +[gpub007:0/64] 2023-07-11 14:40:40,366 (trainer:732) INFO: 37epoch:train:9501-9600batch: iter_time=1.267e-04, forward_time=0.145, loss_ctc=69.005, loss_att=51.201, acc=0.709, loss=56.542, backward_time=1.028, grad_norm=110.687, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.896e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 14:42:56,170 (trainer:732) INFO: 37epoch:train:9601-9700batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=64.277, loss_att=45.290, acc=0.739, loss=50.986, backward_time=1.028, grad_norm=116.655, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.895e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 14:45:11,526 (trainer:732) INFO: 37epoch:train:9701-9800batch: iter_time=1.252e-04, forward_time=0.145, loss_ctc=67.331, loss_att=50.745, acc=0.714, loss=55.721, backward_time=1.026, grad_norm=109.035, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.895e-05, train_time=2.707 +[gpub007:0/64] 2023-07-11 14:47:27,391 (trainer:732) INFO: 37epoch:train:9801-9900batch: iter_time=1.027e-04, forward_time=0.145, loss_ctc=65.575, loss_att=47.666, acc=0.723, loss=53.039, backward_time=1.028, grad_norm=110.732, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.894e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 14:49:43,356 (trainer:732) INFO: 37epoch:train:9901-10000batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=65.215, loss_att=49.947, acc=0.722, loss=54.527, backward_time=1.028, grad_norm=101.913, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.893e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 15:04:48,088 (trainer:338) INFO: 37epoch results: [train] iter_time=0.177, forward_time=0.149, loss_ctc=68.992, loss_att=51.253, acc=0.712, loss=56.575, backward_time=1.033, grad_norm=114.371, clip=100.000, loss_scale=2.637e+31, optim_step_time=0.182, optim0_lr0=5.934e-05, train_time=3.313, time=4 hours, 36 minutes and 30.99 seconds, total_count=340000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=43.668, cer_ctc=0.256, loss_att=38.075, acc=0.679, cer=0.407, wer=0.998, loss=39.753, time=8 minutes and 38.59 seconds, total_count=34914, gpu_max_cached_mem_GB=37.219, [att_plot] time=6 minutes and 3.24 seconds, total_count=0, gpu_max_cached_mem_GB=37.219 +[gpub007:0/64] 2023-07-11 15:05:05,925 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub007:0/64] 2023-07-11 15:05:06,141 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/30epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/32epoch.pth +[gpub007:0/64] 2023-07-11 15:05:06,196 (trainer:272) INFO: 38/50epoch started. Estimated time to finish: 2 days, 14 hours and 49 minutes +[gpub007:0/64] 2023-07-11 15:05:06,412 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-11 15:05:26,268 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 15:05:31,014 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 15:05:31,041 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-11 15:05:31,621 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 15:16:30,217 (trainer:732) INFO: 38epoch:train:1-100batch: iter_time=5.379, forward_time=0.192, loss_ctc=70.747, loss_att=53.761, acc=0.692, loss=58.857, backward_time=1.049, grad_norm=119.443, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.188, optim0_lr0=5.892e-05, train_time=13.677 +[gpub007:0/64] 2023-07-11 15:18:45,992 (trainer:732) INFO: 38epoch:train:101-200batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=73.101, loss_att=53.060, acc=0.700, loss=59.073, backward_time=1.027, grad_norm=122.458, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.891e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 15:21:03,541 (trainer:732) INFO: 38epoch:train:201-300batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=76.026, loss_att=55.987, acc=0.709, loss=61.999, backward_time=1.028, grad_norm=112.894, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.890e-05, train_time=2.751 +[gpub007:0/64] 2023-07-11 15:23:19,228 (trainer:732) INFO: 38epoch:train:301-400batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=71.814, loss_att=55.645, acc=0.698, loss=60.496, backward_time=1.027, grad_norm=131.861, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.890e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 15:25:36,514 (trainer:732) INFO: 38epoch:train:401-500batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=70.354, loss_att=49.610, acc=0.704, loss=55.833, backward_time=1.027, grad_norm=118.136, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.889e-05, train_time=2.745 +[gpub007:0/64] 2023-07-11 15:27:51,727 (trainer:732) INFO: 38epoch:train:501-600batch: iter_time=1.200e-04, forward_time=0.143, loss_ctc=73.588, loss_att=50.672, acc=0.689, loss=57.547, backward_time=1.027, grad_norm=114.586, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.888e-05, train_time=2.704 +[gpub007:0/64] 2023-07-11 15:30:07,936 (trainer:732) INFO: 38epoch:train:601-700batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=72.102, loss_att=54.286, acc=0.690, loss=59.631, backward_time=1.031, grad_norm=134.955, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.887e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 15:32:43,553 (trainer:732) INFO: 38epoch:train:701-800batch: iter_time=1.331e-04, forward_time=0.146, loss_ctc=81.262, loss_att=66.322, acc=0.697, loss=70.804, backward_time=1.060, grad_norm=123.055, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.886e-05, train_time=3.112 +[gpub007:0/64] 2023-07-11 15:33:37,357 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-11 15:33:55,266 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 15:33:58,684 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 15:33:58,684 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-11 15:33:58,690 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 15:38:30,267 (trainer:732) INFO: 38epoch:train:801-900batch: iter_time=1.347, forward_time=0.146, loss_ctc=73.651, loss_att=55.154, acc=0.699, loss=60.703, backward_time=1.051, grad_norm=126.212, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.886e-05, train_time=6.934 +[gpub007:0/64] 2023-07-11 15:40:46,438 (trainer:732) INFO: 38epoch:train:901-1000batch: iter_time=1.174e-04, forward_time=0.144, loss_ctc=70.743, loss_att=51.540, acc=0.697, loss=57.301, backward_time=1.025, grad_norm=125.456, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.885e-05, train_time=2.723 +[gpub007:0/64] 2023-07-11 15:43:01,701 (trainer:732) INFO: 38epoch:train:1001-1100batch: iter_time=1.194e-04, forward_time=0.144, loss_ctc=73.721, loss_att=53.760, acc=0.706, loss=59.748, backward_time=1.024, grad_norm=112.246, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.884e-05, train_time=2.705 +[gpub007:0/64] 2023-07-11 15:45:17,505 (trainer:732) INFO: 38epoch:train:1101-1200batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=72.435, loss_att=52.704, acc=0.711, loss=58.623, backward_time=1.027, grad_norm=104.981, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.883e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 15:47:32,904 (trainer:732) INFO: 38epoch:train:1201-1300batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=75.158, loss_att=54.691, acc=0.700, loss=60.831, backward_time=1.025, grad_norm=122.617, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.882e-05, train_time=2.708 +[gpub007:0/64] 2023-07-11 15:49:48,147 (trainer:732) INFO: 38epoch:train:1301-1400batch: iter_time=1.153e-04, forward_time=0.144, loss_ctc=67.569, loss_att=46.503, acc=0.708, loss=52.823, backward_time=1.025, grad_norm=116.592, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.882e-05, train_time=2.705 +[gpub007:0/64] 2023-07-11 15:52:03,809 (trainer:732) INFO: 38epoch:train:1401-1500batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=74.796, loss_att=51.540, acc=0.693, loss=58.517, backward_time=1.027, grad_norm=110.585, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.881e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 15:54:19,335 (trainer:732) INFO: 38epoch:train:1501-1600batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=71.958, loss_att=61.097, acc=0.690, loss=64.355, backward_time=1.025, grad_norm=113.539, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.880e-05, train_time=2.710 +[gpub007:0/64] 2023-07-11 15:55:50,222 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-11 15:56:08,930 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 15:56:12,441 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 15:56:12,441 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-11 15:56:12,447 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 16:01:18,564 (trainer:732) INFO: 38epoch:train:1601-1700batch: iter_time=1.317, forward_time=0.200, loss_ctc=81.834, loss_att=63.994, acc=0.701, loss=69.346, backward_time=1.044, grad_norm=133.607, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=5.879e-05, train_time=8.384 +[gpub007:0/64] 2023-07-11 16:03:34,797 (trainer:732) INFO: 38epoch:train:1701-1800batch: iter_time=1.470e-04, forward_time=0.148, loss_ctc=67.402, loss_att=49.279, acc=0.700, loss=54.716, backward_time=1.031, grad_norm=110.671, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.878e-05, train_time=2.725 +[gpub007:0/64] 2023-07-11 16:05:50,444 (trainer:732) INFO: 38epoch:train:1801-1900batch: iter_time=9.531e-05, forward_time=0.144, loss_ctc=72.375, loss_att=53.089, acc=0.708, loss=58.875, backward_time=1.027, grad_norm=108.075, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.877e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 16:08:06,252 (trainer:732) INFO: 38epoch:train:1901-2000batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=73.375, loss_att=53.204, acc=0.705, loss=59.255, backward_time=1.029, grad_norm=123.638, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.877e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 16:10:22,103 (trainer:732) INFO: 38epoch:train:2001-2100batch: iter_time=1.034e-04, forward_time=0.145, loss_ctc=73.792, loss_att=53.598, acc=0.707, loss=59.656, backward_time=1.028, grad_norm=113.370, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.876e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 16:12:37,344 (trainer:732) INFO: 38epoch:train:2101-2200batch: iter_time=1.017e-04, forward_time=0.143, loss_ctc=68.777, loss_att=48.127, acc=0.707, loss=54.322, backward_time=1.024, grad_norm=103.500, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.875e-05, train_time=2.705 +[gpub007:0/64] 2023-07-11 16:14:52,913 (trainer:732) INFO: 38epoch:train:2201-2300batch: iter_time=1.120e-04, forward_time=0.144, loss_ctc=72.635, loss_att=50.736, acc=0.688, loss=57.305, backward_time=1.026, grad_norm=121.079, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.874e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 16:17:08,900 (trainer:732) INFO: 38epoch:train:2301-2400batch: iter_time=1.095e-04, forward_time=0.145, loss_ctc=72.630, loss_att=59.796, acc=0.694, loss=63.646, backward_time=1.028, grad_norm=123.940, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.873e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 16:19:26,445 (trainer:732) INFO: 38epoch:train:2401-2500batch: iter_time=1.080e-04, forward_time=0.144, loss_ctc=78.558, loss_att=59.280, acc=0.703, loss=65.064, backward_time=1.027, grad_norm=152.682, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.873e-05, train_time=2.751 +[gpub007:0/64] 2023-07-11 16:19:43,113 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-11 16:20:01,000 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 16:20:04,423 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 16:20:04,423 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-11 16:20:04,430 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 16:26:52,786 (trainer:732) INFO: 38epoch:train:2501-2600batch: iter_time=3.006, forward_time=0.146, loss_ctc=70.412, loss_att=52.815, acc=0.696, loss=58.094, backward_time=1.045, grad_norm=111.045, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.872e-05, train_time=8.927 +[gpub007:0/64] 2023-07-11 16:29:08,531 (trainer:732) INFO: 38epoch:train:2601-2700batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=69.552, loss_att=50.822, acc=0.702, loss=56.441, backward_time=1.028, grad_norm=120.794, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.871e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 16:31:25,764 (trainer:732) INFO: 38epoch:train:2701-2800batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=73.867, loss_att=53.099, acc=0.716, loss=59.329, backward_time=1.028, grad_norm=175.599, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.870e-05, train_time=2.744 +[gpub007:0/64] 2023-07-11 16:33:41,726 (trainer:732) INFO: 38epoch:train:2801-2900batch: iter_time=1.229e-04, forward_time=0.146, loss_ctc=71.414, loss_att=54.946, acc=0.704, loss=59.886, backward_time=1.029, grad_norm=121.577, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.869e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 16:35:57,597 (trainer:732) INFO: 38epoch:train:2901-3000batch: iter_time=1.220e-04, forward_time=0.147, loss_ctc=69.942, loss_att=48.570, acc=0.711, loss=54.981, backward_time=1.029, grad_norm=123.931, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.869e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 16:38:48,643 (trainer:732) INFO: 38epoch:train:3001-3100batch: iter_time=1.234e-04, forward_time=0.145, loss_ctc=69.822, loss_att=48.300, acc=0.699, loss=54.757, backward_time=1.067, grad_norm=119.641, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.868e-05, train_time=3.421 +[gpub007:0/64] 2023-07-11 16:41:05,225 (trainer:732) INFO: 38epoch:train:3101-3200batch: iter_time=1.319e-04, forward_time=0.144, loss_ctc=72.589, loss_att=54.846, acc=0.691, loss=60.169, backward_time=1.030, grad_norm=114.906, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.867e-05, train_time=2.731 +[gpub007:0/64] 2023-07-11 16:43:21,196 (trainer:732) INFO: 38epoch:train:3201-3300batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=78.884, loss_att=63.286, acc=0.699, loss=67.966, backward_time=1.029, grad_norm=120.031, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.866e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 16:44:10,033 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-11 16:44:28,458 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 16:44:31,892 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 16:44:31,892 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-11 16:44:31,899 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 16:48:52,449 (trainer:732) INFO: 38epoch:train:3301-3400batch: iter_time=1.285, forward_time=0.145, loss_ctc=76.662, loss_att=58.886, acc=0.694, loss=64.219, backward_time=1.042, grad_norm=120.851, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.865e-05, train_time=6.625 +[gpub007:0/64] 2023-07-11 16:51:08,696 (trainer:732) INFO: 38epoch:train:3401-3500batch: iter_time=9.670e-05, forward_time=0.144, loss_ctc=68.685, loss_att=48.626, acc=0.711, loss=54.643, backward_time=1.029, grad_norm=120.011, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.864e-05, train_time=2.725 +[gpub007:0/64] 2023-07-11 16:53:24,624 (trainer:732) INFO: 38epoch:train:3501-3600batch: iter_time=9.487e-05, forward_time=0.145, loss_ctc=74.115, loss_att=53.109, acc=0.719, loss=59.411, backward_time=1.029, grad_norm=108.127, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.864e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 16:55:40,535 (trainer:732) INFO: 38epoch:train:3601-3700batch: iter_time=1.207e-04, forward_time=0.147, loss_ctc=69.858, loss_att=52.811, acc=0.704, loss=57.925, backward_time=1.028, grad_norm=111.325, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.863e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 16:57:55,982 (trainer:732) INFO: 38epoch:train:3701-3800batch: iter_time=1.540e-04, forward_time=0.146, loss_ctc=70.752, loss_att=49.487, acc=0.709, loss=55.867, backward_time=1.026, grad_norm=119.178, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.862e-05, train_time=2.709 +[gpub007:0/64] 2023-07-11 17:00:11,497 (trainer:732) INFO: 38epoch:train:3801-3900batch: iter_time=1.460e-04, forward_time=0.146, loss_ctc=70.453, loss_att=47.431, acc=0.698, loss=54.338, backward_time=1.026, grad_norm=109.875, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.861e-05, train_time=2.710 +[gpub007:0/64] 2023-07-11 17:02:27,401 (trainer:732) INFO: 38epoch:train:3901-4000batch: iter_time=1.432e-04, forward_time=0.146, loss_ctc=70.507, loss_att=53.180, acc=0.696, loss=58.378, backward_time=1.030, grad_norm=146.020, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.860e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 17:04:43,224 (trainer:732) INFO: 38epoch:train:4001-4100batch: iter_time=1.373e-04, forward_time=0.147, loss_ctc=78.805, loss_att=62.561, acc=0.704, loss=67.435, backward_time=1.029, grad_norm=131.328, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.860e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 17:06:15,512 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-11 17:06:33,400 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 17:06:36,806 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 17:06:36,806 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-11 17:06:36,812 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 17:10:47,271 (trainer:732) INFO: 38epoch:train:4101-4200batch: iter_time=1.230, forward_time=0.146, loss_ctc=72.417, loss_att=56.184, acc=0.708, loss=61.054, backward_time=1.044, grad_norm=118.253, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.859e-05, train_time=7.281 +[gpub007:0/64] 2023-07-11 17:17:30,106 (trainer:732) INFO: 38epoch:train:4201-4300batch: iter_time=2.531, forward_time=0.213, loss_ctc=68.753, loss_att=50.993, acc=0.709, loss=56.321, backward_time=1.049, grad_norm=121.788, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.858e-05, train_time=8.056 +[gpub007:0/64] 2023-07-11 17:19:49,743 (trainer:732) INFO: 38epoch:train:4301-4400batch: iter_time=1.240e-04, forward_time=0.145, loss_ctc=72.558, loss_att=53.576, acc=0.714, loss=59.270, backward_time=1.032, grad_norm=118.292, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.857e-05, train_time=2.793 +[gpub007:0/64] 2023-07-11 17:22:07,866 (trainer:732) INFO: 38epoch:train:4401-4500batch: iter_time=1.197e-04, forward_time=0.147, loss_ctc=71.112, loss_att=51.616, acc=0.719, loss=57.465, backward_time=1.031, grad_norm=112.490, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.856e-05, train_time=2.762 +[gpub007:0/64] 2023-07-11 17:24:29,383 (trainer:732) INFO: 38epoch:train:4501-4600batch: iter_time=1.195e-04, forward_time=0.147, loss_ctc=71.827, loss_att=53.009, acc=0.715, loss=58.654, backward_time=1.032, grad_norm=123.152, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.856e-05, train_time=2.830 +[gpub007:0/64] 2023-07-11 17:32:11,819 (trainer:732) INFO: 38epoch:train:4601-4700batch: iter_time=3.151, forward_time=0.208, loss_ctc=66.866, loss_att=46.398, acc=0.710, loss=52.539, backward_time=1.043, grad_norm=113.267, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.855e-05, train_time=9.248 +[gpub007:0/64] 2023-07-11 17:34:29,405 (trainer:732) INFO: 38epoch:train:4701-4800batch: iter_time=1.367e-04, forward_time=0.146, loss_ctc=73.675, loss_att=50.793, acc=0.699, loss=57.658, backward_time=1.035, grad_norm=118.717, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.854e-05, train_time=2.752 +[gpub007:0/64] 2023-07-11 17:36:45,640 (trainer:732) INFO: 38epoch:train:4801-4900batch: iter_time=1.438e-04, forward_time=0.147, loss_ctc=74.144, loss_att=61.531, acc=0.702, loss=65.315, backward_time=1.031, grad_norm=118.231, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.853e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 17:39:01,941 (trainer:732) INFO: 38epoch:train:4901-5000batch: iter_time=1.389e-04, forward_time=0.145, loss_ctc=76.127, loss_att=54.705, acc=0.723, loss=61.131, backward_time=1.028, grad_norm=141.808, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.852e-05, train_time=2.726 +[gpub007:0/64] 2023-07-11 17:39:22,673 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-11 17:39:40,627 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 17:39:44,068 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 17:39:44,068 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-11 17:39:44,075 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 17:45:51,254 (trainer:732) INFO: 38epoch:train:5001-5100batch: iter_time=2.554, forward_time=0.146, loss_ctc=69.742, loss_att=52.822, acc=0.708, loss=57.898, backward_time=1.048, grad_norm=120.019, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.852e-05, train_time=8.186 +[gpub007:0/64] 2023-07-11 17:48:07,477 (trainer:732) INFO: 38epoch:train:5101-5200batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=70.586, loss_att=51.568, acc=0.706, loss=57.273, backward_time=1.029, grad_norm=128.961, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.851e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 17:50:25,199 (trainer:732) INFO: 38epoch:train:5201-5300batch: iter_time=1.194e-04, forward_time=0.147, loss_ctc=73.336, loss_att=52.926, acc=0.726, loss=59.049, backward_time=1.034, grad_norm=122.483, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.850e-05, train_time=2.754 +[gpub007:0/64] 2023-07-11 17:52:47,787 (trainer:732) INFO: 38epoch:train:5301-5400batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=70.923, loss_att=53.524, acc=0.712, loss=58.744, backward_time=1.038, grad_norm=114.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.849e-05, train_time=2.852 +[gpub007:0/64] 2023-07-11 17:55:05,182 (trainer:732) INFO: 38epoch:train:5401-5500batch: iter_time=1.268e-04, forward_time=0.147, loss_ctc=69.702, loss_att=47.066, acc=0.721, loss=53.857, backward_time=1.032, grad_norm=119.223, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.848e-05, train_time=2.748 +[gpub007:0/64] 2023-07-11 17:57:25,026 (trainer:732) INFO: 38epoch:train:5501-5600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=68.687, loss_att=49.161, acc=0.704, loss=55.019, backward_time=1.039, grad_norm=124.182, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.848e-05, train_time=2.797 +[gpub007:0/64] 2023-07-11 17:59:40,767 (trainer:732) INFO: 38epoch:train:5601-5700batch: iter_time=1.311e-04, forward_time=0.145, loss_ctc=70.284, loss_att=51.678, acc=0.703, loss=57.260, backward_time=1.025, grad_norm=108.718, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.847e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 18:01:57,543 (trainer:732) INFO: 38epoch:train:5701-5800batch: iter_time=1.326e-04, forward_time=0.145, loss_ctc=75.984, loss_att=63.009, acc=0.712, loss=66.901, backward_time=1.031, grad_norm=123.887, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.846e-05, train_time=2.735 +[gpub007:0/64] 2023-07-11 18:02:45,064 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-11 18:03:03,156 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 18:03:06,586 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 18:03:06,586 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-11 18:03:06,592 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 18:07:19,305 (trainer:732) INFO: 38epoch:train:5801-5900batch: iter_time=1.235, forward_time=0.147, loss_ctc=73.776, loss_att=54.691, acc=0.708, loss=60.417, backward_time=1.043, grad_norm=136.738, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.845e-05, train_time=6.435 +[gpub007:0/64] 2023-07-11 18:09:36,187 (trainer:732) INFO: 38epoch:train:5901-6000batch: iter_time=1.441e-04, forward_time=0.148, loss_ctc=70.183, loss_att=51.391, acc=0.710, loss=57.029, backward_time=1.031, grad_norm=147.682, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.844e-05, train_time=2.737 +[gpub007:0/64] 2023-07-11 18:11:52,397 (trainer:732) INFO: 38epoch:train:6001-6100batch: iter_time=1.310e-04, forward_time=0.148, loss_ctc=70.819, loss_att=52.613, acc=0.711, loss=58.075, backward_time=1.031, grad_norm=113.462, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.844e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 18:14:08,779 (trainer:732) INFO: 38epoch:train:6101-6200batch: iter_time=1.367e-04, forward_time=0.150, loss_ctc=71.513, loss_att=52.325, acc=0.726, loss=58.081, backward_time=1.032, grad_norm=118.225, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.843e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 18:16:24,587 (trainer:732) INFO: 38epoch:train:6201-6300batch: iter_time=1.327e-04, forward_time=0.147, loss_ctc=73.977, loss_att=53.956, acc=0.712, loss=59.962, backward_time=1.028, grad_norm=121.817, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.842e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 18:18:40,880 (trainer:732) INFO: 38epoch:train:6301-6400batch: iter_time=1.296e-04, forward_time=0.147, loss_ctc=66.128, loss_att=45.003, acc=0.718, loss=51.341, backward_time=1.027, grad_norm=106.130, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.841e-05, train_time=2.726 +[gpub007:0/64] 2023-07-11 18:20:56,411 (trainer:732) INFO: 38epoch:train:6401-6500batch: iter_time=1.340e-04, forward_time=0.146, loss_ctc=74.344, loss_att=50.406, acc=0.699, loss=57.588, backward_time=1.027, grad_norm=119.146, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.840e-05, train_time=2.710 +[gpub007:0/64] 2023-07-11 18:23:12,519 (trainer:732) INFO: 38epoch:train:6501-6600batch: iter_time=9.948e-05, forward_time=0.146, loss_ctc=70.868, loss_att=60.984, acc=0.701, loss=63.949, backward_time=1.030, grad_norm=116.879, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.840e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 18:24:45,044 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-11 18:25:02,972 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 18:25:06,379 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 18:25:06,379 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-11 18:25:06,385 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 18:30:09,584 (trainer:732) INFO: 38epoch:train:6601-6700batch: iter_time=1.227, forward_time=0.155, loss_ctc=73.224, loss_att=52.921, acc=0.725, loss=59.012, backward_time=1.047, grad_norm=116.298, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.839e-05, train_time=8.341 +[gpub007:0/64] 2023-07-11 18:32:26,484 (trainer:732) INFO: 38epoch:train:6701-6800batch: iter_time=1.234e-04, forward_time=0.147, loss_ctc=72.226, loss_att=55.596, acc=0.709, loss=60.585, backward_time=1.030, grad_norm=113.274, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.838e-05, train_time=2.738 +[gpub007:0/64] 2023-07-11 18:34:42,989 (trainer:732) INFO: 38epoch:train:6801-6900batch: iter_time=1.331e-04, forward_time=0.148, loss_ctc=66.143, loss_att=48.199, acc=0.713, loss=53.583, backward_time=1.030, grad_norm=119.491, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.837e-05, train_time=2.730 +[gpub007:0/64] 2023-07-11 18:36:59,131 (trainer:732) INFO: 38epoch:train:6901-7000batch: iter_time=1.259e-04, forward_time=0.148, loss_ctc=77.564, loss_att=57.864, acc=0.718, loss=63.774, backward_time=1.031, grad_norm=160.365, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.836e-05, train_time=2.723 +[gpub007:0/64] 2023-07-11 18:39:17,279 (trainer:732) INFO: 38epoch:train:7001-7100batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=70.556, loss_att=50.590, acc=0.724, loss=56.580, backward_time=1.037, grad_norm=122.650, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.836e-05, train_time=2.763 +[gpub007:0/64] 2023-07-11 18:41:34,504 (trainer:732) INFO: 38epoch:train:7101-7200batch: iter_time=3.144e-04, forward_time=0.149, loss_ctc=68.346, loss_att=48.698, acc=0.710, loss=54.593, backward_time=1.029, grad_norm=118.638, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.835e-05, train_time=2.744 +[gpub007:0/64] 2023-07-11 18:43:57,994 (trainer:732) INFO: 38epoch:train:7201-7300batch: iter_time=1.238e-04, forward_time=0.155, loss_ctc=66.162, loss_att=45.993, acc=0.706, loss=52.044, backward_time=1.036, grad_norm=106.315, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.834e-05, train_time=2.870 +[gpub007:0/64] 2023-07-11 18:46:14,344 (trainer:732) INFO: 38epoch:train:7301-7400batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=73.532, loss_att=58.908, acc=0.703, loss=63.295, backward_time=1.032, grad_norm=115.391, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.833e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 18:48:30,970 (trainer:732) INFO: 38epoch:train:7401-7500batch: iter_time=1.122e-04, forward_time=0.146, loss_ctc=76.415, loss_att=58.132, acc=0.720, loss=63.617, backward_time=1.029, grad_norm=139.268, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.832e-05, train_time=2.732 +[gpub007:0/64] 2023-07-11 18:48:34,386 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-11 18:48:52,397 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 18:48:55,825 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 18:48:55,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-11 18:48:55,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 18:55:43,722 (trainer:732) INFO: 38epoch:train:7501-7600batch: iter_time=1.273, forward_time=0.146, loss_ctc=68.725, loss_att=54.047, acc=0.697, loss=58.451, backward_time=1.046, grad_norm=128.241, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.832e-05, train_time=8.655 +[gpub007:0/64] 2023-07-11 18:58:00,008 (trainer:732) INFO: 38epoch:train:7601-7700batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=69.149, loss_att=49.610, acc=0.713, loss=55.472, backward_time=1.028, grad_norm=114.492, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.831e-05, train_time=2.726 +[gpub007:0/64] 2023-07-11 19:00:15,773 (trainer:732) INFO: 38epoch:train:7701-7800batch: iter_time=1.135e-04, forward_time=0.144, loss_ctc=74.497, loss_att=54.442, acc=0.715, loss=60.458, backward_time=1.026, grad_norm=113.087, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.830e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 19:02:31,653 (trainer:732) INFO: 38epoch:train:7801-7900batch: iter_time=1.162e-04, forward_time=0.146, loss_ctc=71.600, loss_att=54.243, acc=0.707, loss=59.450, backward_time=1.028, grad_norm=109.203, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.829e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 19:04:47,401 (trainer:732) INFO: 38epoch:train:7901-8000batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=69.502, loss_att=48.025, acc=0.717, loss=54.468, backward_time=1.027, grad_norm=148.987, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.829e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 19:07:03,115 (trainer:732) INFO: 38epoch:train:8001-8100batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=68.040, loss_att=47.776, acc=0.705, loss=53.855, backward_time=1.027, grad_norm=108.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.828e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 19:09:18,954 (trainer:732) INFO: 38epoch:train:8101-8200batch: iter_time=1.398e-04, forward_time=0.145, loss_ctc=69.845, loss_att=51.788, acc=0.700, loss=57.205, backward_time=1.028, grad_norm=106.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.827e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 19:11:34,893 (trainer:732) INFO: 38epoch:train:8201-8300batch: iter_time=1.373e-04, forward_time=0.147, loss_ctc=76.879, loss_att=62.620, acc=0.704, loss=66.898, backward_time=1.029, grad_norm=146.480, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.826e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 19:12:20,817 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-11 19:12:39,527 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 19:12:43,264 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 19:12:43,264 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-11 19:12:43,270 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 19:17:54,605 (trainer:732) INFO: 38epoch:train:8301-8400batch: iter_time=1.258, forward_time=0.183, loss_ctc=72.466, loss_att=54.292, acc=0.711, loss=59.744, backward_time=1.060, grad_norm=111.111, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.825e-05, train_time=7.594 +[gpub007:0/64] 2023-07-11 19:20:15,267 (trainer:732) INFO: 38epoch:train:8401-8500batch: iter_time=2.415e-04, forward_time=0.154, loss_ctc=66.970, loss_att=49.058, acc=0.715, loss=54.431, backward_time=1.039, grad_norm=124.947, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.825e-05, train_time=2.813 +[gpub007:0/64] 2023-07-11 19:22:32,016 (trainer:732) INFO: 38epoch:train:8501-8600batch: iter_time=1.261e-04, forward_time=0.146, loss_ctc=70.855, loss_att=52.953, acc=0.714, loss=58.324, backward_time=1.028, grad_norm=119.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.824e-05, train_time=2.735 +[gpub007:0/64] 2023-07-11 19:24:48,129 (trainer:732) INFO: 38epoch:train:8601-8700batch: iter_time=1.272e-04, forward_time=0.147, loss_ctc=71.921, loss_att=50.753, acc=0.726, loss=57.103, backward_time=1.029, grad_norm=124.913, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.823e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 19:27:04,015 (trainer:732) INFO: 38epoch:train:8701-8800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=72.131, loss_att=56.377, acc=0.711, loss=61.103, backward_time=1.029, grad_norm=166.004, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.822e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 19:29:19,821 (trainer:732) INFO: 38epoch:train:8801-8900batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=65.279, loss_att=44.458, acc=0.720, loss=50.705, backward_time=1.029, grad_norm=110.307, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.821e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 19:31:35,382 (trainer:732) INFO: 38epoch:train:8901-9000batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=74.734, loss_att=51.599, acc=0.700, loss=58.539, backward_time=1.028, grad_norm=109.171, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.821e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 19:33:51,354 (trainer:732) INFO: 38epoch:train:9001-9100batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=72.050, loss_att=59.282, acc=0.708, loss=63.112, backward_time=1.030, grad_norm=117.917, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.820e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 19:35:41,200 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-11 19:35:59,242 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 19:36:02,770 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 19:36:02,770 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-11 19:36:02,776 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 19:40:21,958 (trainer:732) INFO: 38epoch:train:9101-9200batch: iter_time=1.997, forward_time=0.145, loss_ctc=77.765, loss_att=61.390, acc=0.716, loss=66.302, backward_time=1.047, grad_norm=120.032, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.819e-05, train_time=7.812 +[gpub007:0/64] 2023-07-11 19:42:38,550 (trainer:732) INFO: 38epoch:train:9201-9300batch: iter_time=1.323e-04, forward_time=0.146, loss_ctc=67.866, loss_att=49.324, acc=0.710, loss=54.886, backward_time=1.032, grad_norm=107.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.818e-05, train_time=2.732 +[gpub007:0/64] 2023-07-11 19:44:54,794 (trainer:732) INFO: 38epoch:train:9301-9400batch: iter_time=1.387e-04, forward_time=0.147, loss_ctc=69.892, loss_att=52.442, acc=0.716, loss=57.677, backward_time=1.029, grad_norm=143.222, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.817e-05, train_time=2.725 +[gpub007:0/64] 2023-07-11 19:47:11,992 (trainer:732) INFO: 38epoch:train:9401-9500batch: iter_time=1.198e-04, forward_time=0.148, loss_ctc=70.450, loss_att=51.486, acc=0.719, loss=57.175, backward_time=1.029, grad_norm=138.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.817e-05, train_time=2.744 +[gpub007:0/64] 2023-07-11 19:49:28,386 (trainer:732) INFO: 38epoch:train:9501-9600batch: iter_time=1.269e-04, forward_time=0.147, loss_ctc=72.542, loss_att=52.742, acc=0.717, loss=58.682, backward_time=1.030, grad_norm=121.855, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.816e-05, train_time=2.728 +[gpub007:0/64] 2023-07-11 19:51:44,010 (trainer:732) INFO: 38epoch:train:9601-9700batch: iter_time=1.413e-04, forward_time=0.146, loss_ctc=64.937, loss_att=44.673, acc=0.718, loss=50.752, backward_time=1.027, grad_norm=101.027, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.815e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 19:53:59,791 (trainer:732) INFO: 38epoch:train:9701-9800batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.702, loss_att=49.664, acc=0.699, loss=55.976, backward_time=1.028, grad_norm=123.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.814e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 19:56:15,795 (trainer:732) INFO: 38epoch:train:9801-9900batch: iter_time=1.446e-04, forward_time=0.147, loss_ctc=71.465, loss_att=59.772, acc=0.706, loss=63.280, backward_time=1.029, grad_norm=106.173, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.814e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 19:58:41,336 (trainer:732) INFO: 38epoch:train:9901-10000batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=74.490, loss_att=53.633, acc=0.724, loss=59.890, backward_time=1.040, grad_norm=135.312, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.813e-05, train_time=2.911 +[gpub007:0/64] 2023-07-11 20:11:56,519 (trainer:338) INFO: 38epoch results: [train] iter_time=0.288, forward_time=0.149, loss_ctc=71.940, loss_att=53.255, acc=0.707, loss=58.860, backward_time=1.032, grad_norm=121.692, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.852e-05, train_time=3.523, time=4 hours, 53 minutes and 53.03 seconds, total_count=350000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=43.950, cer_ctc=0.261, loss_att=37.228, acc=0.685, cer=0.385, wer=0.996, loss=39.244, time=7 minutes and 0.7 seconds, total_count=35926, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 56.54 seconds, total_count=0, gpu_max_cached_mem_GB=37.219 +[gpub007:0/64] 2023-07-11 20:12:12,342 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub007:0/64] 2023-07-11 20:12:12,352 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/24epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/33epoch.pth +[gpub007:0/64] 2023-07-11 20:12:12,352 (trainer:272) INFO: 39/50epoch started. Estimated time to finish: 2 days, 10 hours and 25 minutes +[gpub007:0/64] 2023-07-11 20:12:12,356 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-11 20:12:30,179 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 20:12:33,884 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 20:12:33,884 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub007:0/64] 2023-07-11 20:12:33,890 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 20:17:04,086 (trainer:732) INFO: 39epoch:train:1-100batch: iter_time=1.472, forward_time=0.146, loss_ctc=82.445, loss_att=57.403, acc=0.707, loss=64.915, backward_time=1.051, grad_norm=146.381, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.812e-05, train_time=5.834 +[gpub007:0/64] 2023-07-11 20:19:22,186 (trainer:732) INFO: 39epoch:train:101-200batch: iter_time=6.767e-04, forward_time=0.163, loss_ctc=70.922, loss_att=53.759, acc=0.705, loss=58.908, backward_time=1.028, grad_norm=143.470, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.811e-05, train_time=2.762 +[gpub007:0/64] 2023-07-11 20:21:39,123 (trainer:732) INFO: 39epoch:train:201-300batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=72.707, loss_att=63.764, acc=0.689, loss=66.447, backward_time=1.029, grad_norm=122.595, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.810e-05, train_time=2.739 +[gpub007:0/64] 2023-07-11 20:24:04,949 (trainer:732) INFO: 39epoch:train:301-400batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=75.385, loss_att=61.053, acc=0.699, loss=65.353, backward_time=1.043, grad_norm=117.329, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.810e-05, train_time=2.916 +[gpub007:0/64] 2023-07-11 20:26:28,493 (trainer:732) INFO: 39epoch:train:401-500batch: iter_time=1.191e-04, forward_time=0.145, loss_ctc=66.666, loss_att=48.058, acc=0.718, loss=53.640, backward_time=1.036, grad_norm=100.428, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.809e-05, train_time=2.871 +[gpub007:0/64] 2023-07-11 20:28:50,785 (trainer:732) INFO: 39epoch:train:501-600batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=73.731, loss_att=55.130, acc=0.703, loss=60.710, backward_time=1.033, grad_norm=125.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.808e-05, train_time=2.846 +[gpub007:0/64] 2023-07-11 20:31:06,991 (trainer:732) INFO: 39epoch:train:601-700batch: iter_time=1.222e-04, forward_time=0.144, loss_ctc=75.103, loss_att=54.202, acc=0.718, loss=60.472, backward_time=1.028, grad_norm=116.167, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.807e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 20:33:32,626 (trainer:732) INFO: 39epoch:train:701-800batch: iter_time=1.113e-04, forward_time=0.179, loss_ctc=83.461, loss_att=67.013, acc=0.708, loss=71.947, backward_time=1.038, grad_norm=124.775, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.806e-05, train_time=2.912 +[gpub007:0/64] 2023-07-11 20:34:25,347 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub007:0/64] 2023-07-11 20:34:42,967 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 20:34:46,607 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 20:34:46,607 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub007:0/64] 2023-07-11 20:34:46,613 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 20:40:11,035 (trainer:732) INFO: 39epoch:train:801-900batch: iter_time=1.515, forward_time=0.194, loss_ctc=69.526, loss_att=50.779, acc=0.716, loss=56.403, backward_time=1.043, grad_norm=109.498, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.806e-05, train_time=7.968 +[gpub007:0/64] 2023-07-11 20:42:27,546 (trainer:732) INFO: 39epoch:train:901-1000batch: iter_time=1.281e-04, forward_time=0.146, loss_ctc=72.782, loss_att=50.449, acc=0.717, loss=57.149, backward_time=1.029, grad_norm=119.216, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.805e-05, train_time=2.730 +[gpub007:0/64] 2023-07-11 20:44:43,782 (trainer:732) INFO: 39epoch:train:1001-1100batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=71.584, loss_att=61.262, acc=0.694, loss=64.358, backward_time=1.031, grad_norm=117.670, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.804e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 20:47:00,214 (trainer:732) INFO: 39epoch:train:1101-1200batch: iter_time=1.113e-04, forward_time=0.146, loss_ctc=78.972, loss_att=65.692, acc=0.686, loss=69.676, backward_time=1.031, grad_norm=138.979, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.803e-05, train_time=2.728 +[gpub007:0/64] 2023-07-11 20:49:16,262 (trainer:732) INFO: 39epoch:train:1201-1300batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=65.371, loss_att=48.898, acc=0.724, loss=53.840, backward_time=1.028, grad_norm=96.084, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.803e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 20:51:32,107 (trainer:732) INFO: 39epoch:train:1301-1400batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=77.244, loss_att=55.458, acc=0.720, loss=61.994, backward_time=1.028, grad_norm=119.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.802e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 20:53:47,887 (trainer:732) INFO: 39epoch:train:1401-1500batch: iter_time=1.205e-04, forward_time=0.145, loss_ctc=68.215, loss_att=49.460, acc=0.716, loss=55.086, backward_time=1.028, grad_norm=110.506, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.801e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 20:56:03,844 (trainer:732) INFO: 39epoch:train:1501-1600batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=84.470, loss_att=67.063, acc=0.704, loss=72.285, backward_time=1.028, grad_norm=143.565, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.800e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 20:57:38,968 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub007:0/64] 2023-07-11 20:57:57,200 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 20:58:00,858 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 20:58:00,858 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-11 20:58:00,864 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 21:01:27,700 (trainer:732) INFO: 39epoch:train:1601-1700batch: iter_time=1.388, forward_time=0.145, loss_ctc=65.246, loss_att=47.145, acc=0.726, loss=52.576, backward_time=1.037, grad_norm=105.883, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.799e-05, train_time=6.477 +[gpub007:0/64] 2023-07-11 21:03:43,964 (trainer:732) INFO: 39epoch:train:1701-1800batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=80.856, loss_att=60.346, acc=0.697, loss=66.499, backward_time=1.029, grad_norm=132.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.799e-05, train_time=2.725 +[gpub007:0/64] 2023-07-11 21:05:59,603 (trainer:732) INFO: 39epoch:train:1801-1900batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=67.475, loss_att=59.764, acc=0.686, loss=62.077, backward_time=1.027, grad_norm=116.056, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.798e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 21:08:20,006 (trainer:732) INFO: 39epoch:train:1901-2000batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=79.321, loss_att=69.281, acc=0.669, loss=72.293, backward_time=1.029, grad_norm=130.565, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.797e-05, train_time=2.808 +[gpub007:0/64] 2023-07-11 21:09:11,473 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub007:0/64] 2023-07-11 21:10:39,704 (trainer:732) INFO: 39epoch:train:2001-2100batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=64.597, loss_att=51.502, acc=0.702, loss=55.430, backward_time=1.031, grad_norm=96.886, clip=100.000, loss_scale=4.437e+32, optim_step_time=0.182, optim0_lr0=5.796e-05, train_time=2.794 +[gpub007:0/64] 2023-07-11 21:12:55,607 (trainer:732) INFO: 39epoch:train:2101-2200batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=74.772, loss_att=53.675, acc=0.714, loss=60.004, backward_time=1.028, grad_norm=117.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.796e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 21:15:11,288 (trainer:732) INFO: 39epoch:train:2201-2300batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=67.289, loss_att=51.779, acc=0.697, loss=56.432, backward_time=1.028, grad_norm=106.108, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.795e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 21:17:26,865 (trainer:732) INFO: 39epoch:train:2301-2400batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=75.190, loss_att=56.083, acc=0.710, loss=61.815, backward_time=1.026, grad_norm=113.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.794e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 21:19:42,549 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub007:0/64] 2023-07-11 21:20:00,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 21:20:04,592 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 21:20:04,592 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub007:0/64] 2023-07-11 21:20:04,598 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 21:24:58,902 (trainer:732) INFO: 39epoch:train:2401-2500batch: iter_time=1.280, forward_time=0.145, loss_ctc=76.397, loss_att=58.799, acc=0.714, loss=64.078, backward_time=1.034, grad_norm=115.315, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.793e-05, train_time=9.041 +[gpub007:0/64] 2023-07-11 21:27:17,544 (trainer:732) INFO: 39epoch:train:2501-2600batch: iter_time=1.457e-04, forward_time=0.148, loss_ctc=81.056, loss_att=59.578, acc=0.701, loss=66.021, backward_time=1.043, grad_norm=142.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.792e-05, train_time=2.773 +[gpub007:0/64] 2023-07-11 21:29:33,439 (trainer:732) INFO: 39epoch:train:2601-2700batch: iter_time=1.604e-04, forward_time=0.148, loss_ctc=65.619, loss_att=55.447, acc=0.690, loss=58.498, backward_time=1.029, grad_norm=102.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.792e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 21:31:49,666 (trainer:732) INFO: 39epoch:train:2701-2800batch: iter_time=1.603e-04, forward_time=0.148, loss_ctc=82.212, loss_att=71.106, acc=0.669, loss=74.438, backward_time=1.031, grad_norm=129.332, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.791e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 21:34:05,242 (trainer:732) INFO: 39epoch:train:2801-2900batch: iter_time=1.524e-04, forward_time=0.147, loss_ctc=64.134, loss_att=51.913, acc=0.700, loss=55.579, backward_time=1.027, grad_norm=123.945, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.790e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 21:36:23,083 (trainer:732) INFO: 39epoch:train:2901-3000batch: iter_time=1.439e-04, forward_time=0.147, loss_ctc=79.298, loss_att=55.578, acc=0.712, loss=62.694, backward_time=1.030, grad_norm=139.929, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.789e-05, train_time=2.757 +[gpub007:0/64] 2023-07-11 21:38:38,798 (trainer:732) INFO: 39epoch:train:3001-3100batch: iter_time=1.589e-04, forward_time=0.147, loss_ctc=63.415, loss_att=48.217, acc=0.695, loss=52.776, backward_time=1.027, grad_norm=103.041, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.789e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 21:40:56,121 (trainer:732) INFO: 39epoch:train:3101-3200batch: iter_time=1.386e-04, forward_time=0.147, loss_ctc=78.088, loss_att=58.351, acc=0.711, loss=64.272, backward_time=1.029, grad_norm=105.578, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.788e-05, train_time=2.746 +[gpub007:0/64] 2023-07-11 21:41:51,553 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model. +[gpub007:0/64] 2023-07-11 21:43:12,954 (trainer:732) INFO: 39epoch:train:3201-3300batch: iter_time=1.415e-04, forward_time=0.148, loss_ctc=73.884, loss_att=56.857, acc=0.712, loss=61.965, backward_time=1.031, grad_norm=107.966, clip=100.000, loss_scale=2.252e+32, optim_step_time=0.182, optim0_lr0=5.787e-05, train_time=2.736 +[gpub007:0/64] 2023-07-11 21:43:59,089 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub007:0/64] 2023-07-11 21:44:17,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 21:44:20,710 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 21:44:20,710 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub007:0/64] 2023-07-11 21:44:20,717 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 21:50:15,040 (trainer:732) INFO: 39epoch:train:3301-3400batch: iter_time=1.282, forward_time=0.160, loss_ctc=80.950, loss_att=59.708, acc=0.700, loss=66.081, backward_time=1.043, grad_norm=114.918, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.786e-05, train_time=8.441 +[gpub007:0/64] 2023-07-11 21:52:36,391 (trainer:732) INFO: 39epoch:train:3401-3500batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=68.881, loss_att=53.019, acc=0.709, loss=57.778, backward_time=1.036, grad_norm=115.394, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.785e-05, train_time=2.827 +[gpub007:0/64] 2023-07-11 21:54:53,232 (trainer:732) INFO: 39epoch:train:3501-3600batch: iter_time=1.270e-04, forward_time=0.145, loss_ctc=70.559, loss_att=61.970, acc=0.696, loss=64.546, backward_time=1.031, grad_norm=135.658, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.785e-05, train_time=2.737 +[gpub007:0/64] 2023-07-11 21:57:09,569 (trainer:732) INFO: 39epoch:train:3601-3700batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=75.565, loss_att=60.106, acc=0.703, loss=64.744, backward_time=1.031, grad_norm=135.792, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.784e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 21:59:25,333 (trainer:732) INFO: 39epoch:train:3701-3800batch: iter_time=1.349e-04, forward_time=0.145, loss_ctc=63.881, loss_att=45.538, acc=0.730, loss=51.041, backward_time=1.027, grad_norm=119.573, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.783e-05, train_time=2.715 +[gpub007:0/64] 2023-07-11 22:01:42,075 (trainer:732) INFO: 39epoch:train:3801-3900batch: iter_time=1.319e-04, forward_time=0.146, loss_ctc=71.441, loss_att=54.331, acc=0.711, loss=59.464, backward_time=1.028, grad_norm=140.062, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.782e-05, train_time=2.735 +[gpub007:0/64] 2023-07-11 22:03:57,692 (trainer:732) INFO: 39epoch:train:3901-4000batch: iter_time=1.283e-04, forward_time=0.145, loss_ctc=73.747, loss_att=52.312, acc=0.723, loss=58.742, backward_time=1.027, grad_norm=108.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.782e-05, train_time=2.712 +[gpub007:0/64] 2023-07-11 22:06:16,663 (trainer:732) INFO: 39epoch:train:4001-4100batch: iter_time=1.254e-04, forward_time=0.146, loss_ctc=82.516, loss_att=66.612, acc=0.709, loss=71.383, backward_time=1.033, grad_norm=125.623, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.781e-05, train_time=2.779 +[gpub007:0/64] 2023-07-11 22:07:54,576 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub007:0/64] 2023-07-11 22:08:12,612 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 22:08:16,068 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 22:08:16,068 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub007:0/64] 2023-07-11 22:08:16,074 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 22:12:33,176 (trainer:732) INFO: 39epoch:train:4101-4200batch: iter_time=1.323, forward_time=0.145, loss_ctc=64.505, loss_att=45.467, acc=0.726, loss=51.178, backward_time=1.043, grad_norm=98.405, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.780e-05, train_time=7.530 +[gpub007:0/64] 2023-07-11 22:14:50,309 (trainer:732) INFO: 39epoch:train:4201-4300batch: iter_time=1.246e-04, forward_time=0.145, loss_ctc=80.061, loss_att=59.836, acc=0.698, loss=65.903, backward_time=1.031, grad_norm=112.396, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.779e-05, train_time=2.742 +[gpub007:0/64] 2023-07-11 22:17:06,891 (trainer:732) INFO: 39epoch:train:4301-4400batch: iter_time=1.234e-04, forward_time=0.145, loss_ctc=66.777, loss_att=55.327, acc=0.692, loss=58.762, backward_time=1.033, grad_norm=124.735, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.779e-05, train_time=2.731 +[gpub007:0/64] 2023-07-11 22:19:26,276 (trainer:732) INFO: 39epoch:train:4401-4500batch: iter_time=1.416e-04, forward_time=0.146, loss_ctc=72.687, loss_att=67.506, acc=0.678, loss=69.061, backward_time=1.031, grad_norm=111.537, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.778e-05, train_time=2.787 +[gpub007:0/64] 2023-07-11 22:21:42,125 (trainer:732) INFO: 39epoch:train:4501-4600batch: iter_time=1.443e-04, forward_time=0.145, loss_ctc=71.028, loss_att=53.255, acc=0.705, loss=58.587, backward_time=1.027, grad_norm=118.819, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.777e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 22:23:57,568 (trainer:732) INFO: 39epoch:train:4601-4700batch: iter_time=1.358e-04, forward_time=0.144, loss_ctc=74.469, loss_att=52.505, acc=0.718, loss=59.094, backward_time=1.025, grad_norm=106.386, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.776e-05, train_time=2.709 +[gpub007:0/64] 2023-07-11 22:26:17,925 (trainer:732) INFO: 39epoch:train:4701-4800batch: iter_time=1.489e-04, forward_time=0.145, loss_ctc=67.380, loss_att=50.509, acc=0.698, loss=55.570, backward_time=1.032, grad_norm=118.811, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.775e-05, train_time=2.807 +[gpub007:0/64] 2023-07-11 22:28:33,973 (trainer:732) INFO: 39epoch:train:4801-4900batch: iter_time=1.461e-04, forward_time=0.145, loss_ctc=76.569, loss_att=58.151, acc=0.712, loss=63.676, backward_time=1.028, grad_norm=109.727, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.775e-05, train_time=2.721 +[gpub007:0/64] 2023-07-11 22:30:49,870 (trainer:732) INFO: 39epoch:train:4901-5000batch: iter_time=1.377e-04, forward_time=0.145, loss_ctc=75.369, loss_att=58.528, acc=0.713, loss=63.580, backward_time=1.029, grad_norm=111.846, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.774e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 22:30:51,224 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub007:0/64] 2023-07-11 22:31:09,707 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 22:31:13,167 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 22:31:13,167 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub007:0/64] 2023-07-11 22:31:13,173 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 22:38:08,191 (trainer:732) INFO: 39epoch:train:5001-5100batch: iter_time=1.344, forward_time=0.145, loss_ctc=82.216, loss_att=59.417, acc=0.703, loss=66.256, backward_time=1.042, grad_norm=162.555, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.773e-05, train_time=8.766 +[gpub007:0/64] 2023-07-11 22:40:24,746 (trainer:732) INFO: 39epoch:train:5101-5200batch: iter_time=1.092e-04, forward_time=0.145, loss_ctc=65.477, loss_att=56.303, acc=0.692, loss=59.055, backward_time=1.027, grad_norm=106.277, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.772e-05, train_time=2.731 +[gpub007:0/64] 2023-07-11 22:42:41,079 (trainer:732) INFO: 39epoch:train:5201-5300batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=77.772, loss_att=69.323, acc=0.670, loss=71.858, backward_time=1.031, grad_norm=135.923, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.772e-05, train_time=2.726 +[gpub007:0/64] 2023-07-11 22:44:56,788 (trainer:732) INFO: 39epoch:train:5301-5400batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=62.953, loss_att=48.841, acc=0.710, loss=53.075, backward_time=1.027, grad_norm=103.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.771e-05, train_time=2.714 +[gpub007:0/64] 2023-07-11 22:47:12,771 (trainer:732) INFO: 39epoch:train:5401-5500batch: iter_time=1.177e-04, forward_time=0.145, loss_ctc=77.382, loss_att=55.306, acc=0.718, loss=61.929, backward_time=1.031, grad_norm=114.521, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.770e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 22:49:42,770 (trainer:732) INFO: 39epoch:train:5501-5600batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=64.633, loss_att=48.744, acc=0.696, loss=53.511, backward_time=1.054, grad_norm=113.882, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.769e-05, train_time=3.000 +[gpub007:0/64] 2023-07-11 22:51:58,793 (trainer:732) INFO: 39epoch:train:5601-5700batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=77.302, loss_att=57.512, acc=0.712, loss=63.449, backward_time=1.029, grad_norm=128.634, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.769e-05, train_time=2.720 +[gpub007:0/64] 2023-07-11 22:54:16,192 (trainer:732) INFO: 39epoch:train:5701-5800batch: iter_time=1.324e-04, forward_time=0.147, loss_ctc=73.793, loss_att=55.864, acc=0.721, loss=61.243, backward_time=1.030, grad_norm=114.852, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.768e-05, train_time=2.748 +[gpub007:0/64] 2023-07-11 22:55:04,165 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub007:0/64] 2023-07-11 22:55:22,024 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 22:55:25,496 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 22:55:25,496 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub007:0/64] 2023-07-11 22:55:25,503 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 23:00:17,174 (trainer:732) INFO: 39epoch:train:5801-5900batch: iter_time=1.300, forward_time=0.148, loss_ctc=79.487, loss_att=58.835, acc=0.707, loss=65.030, backward_time=1.046, grad_norm=153.347, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.767e-05, train_time=7.219 +[gpub007:0/64] 2023-07-11 23:02:33,705 (trainer:732) INFO: 39epoch:train:5901-6000batch: iter_time=1.184e-04, forward_time=0.147, loss_ctc=68.365, loss_att=51.824, acc=0.712, loss=56.787, backward_time=1.030, grad_norm=106.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.766e-05, train_time=2.730 +[gpub007:0/64] 2023-07-11 23:04:49,940 (trainer:732) INFO: 39epoch:train:6001-6100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=70.279, loss_att=63.284, acc=0.692, loss=65.382, backward_time=1.028, grad_norm=113.976, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.765e-05, train_time=2.724 +[gpub007:0/64] 2023-07-11 23:07:06,067 (trainer:732) INFO: 39epoch:train:6101-6200batch: iter_time=1.352e-04, forward_time=0.147, loss_ctc=74.481, loss_att=58.346, acc=0.709, loss=63.186, backward_time=1.030, grad_norm=110.168, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.765e-05, train_time=2.722 +[gpub007:0/64] 2023-07-11 23:09:21,857 (trainer:732) INFO: 39epoch:train:6201-6300batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=64.434, loss_att=46.023, acc=0.729, loss=51.546, backward_time=1.027, grad_norm=104.476, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.764e-05, train_time=2.716 +[gpub007:0/64] 2023-07-11 23:11:37,542 (trainer:732) INFO: 39epoch:train:6301-6400batch: iter_time=9.883e-05, forward_time=0.146, loss_ctc=73.731, loss_att=55.854, acc=0.705, loss=61.217, backward_time=1.029, grad_norm=128.634, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.763e-05, train_time=2.713 +[gpub007:0/64] 2023-07-11 23:13:53,110 (trainer:732) INFO: 39epoch:train:6401-6500batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=73.122, loss_att=52.799, acc=0.722, loss=58.896, backward_time=1.027, grad_norm=116.155, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.762e-05, train_time=2.711 +[gpub007:0/64] 2023-07-11 23:16:09,488 (trainer:732) INFO: 39epoch:train:6501-6600batch: iter_time=1.200e-04, forward_time=0.148, loss_ctc=80.677, loss_att=64.731, acc=0.717, loss=69.514, backward_time=1.032, grad_norm=124.275, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.762e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 23:17:43,563 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub007:0/64] 2023-07-11 23:18:01,565 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 23:18:04,990 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 23:18:04,991 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub007:0/64] 2023-07-11 23:18:04,997 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 23:23:07,054 (trainer:732) INFO: 39epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.148, loss_ctc=62.708, loss_att=44.454, acc=0.731, loss=49.930, backward_time=1.044, grad_norm=108.320, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.761e-05, train_time=8.351 +[gpub007:0/64] 2023-07-11 23:25:23,988 (trainer:732) INFO: 39epoch:train:6701-6800batch: iter_time=1.248e-04, forward_time=0.147, loss_ctc=76.781, loss_att=56.793, acc=0.708, loss=62.789, backward_time=1.033, grad_norm=122.413, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.760e-05, train_time=2.738 +[gpub007:0/64] 2023-07-11 23:27:39,830 (trainer:732) INFO: 39epoch:train:6801-6900batch: iter_time=1.359e-04, forward_time=0.145, loss_ctc=67.450, loss_att=56.179, acc=0.690, loss=59.560, backward_time=1.029, grad_norm=122.769, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.759e-05, train_time=2.717 +[gpub007:0/64] 2023-07-11 23:29:55,762 (trainer:732) INFO: 39epoch:train:6901-7000batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=74.007, loss_att=67.955, acc=0.676, loss=69.771, backward_time=1.028, grad_norm=110.064, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.759e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 23:32:11,662 (trainer:732) INFO: 39epoch:train:7001-7100batch: iter_time=1.410e-04, forward_time=0.146, loss_ctc=69.492, loss_att=52.768, acc=0.706, loss=57.785, backward_time=1.028, grad_norm=124.454, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.758e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 23:34:27,575 (trainer:732) INFO: 39epoch:train:7101-7200batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=74.585, loss_att=52.634, acc=0.720, loss=59.220, backward_time=1.025, grad_norm=118.461, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.757e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 23:36:48,766 (trainer:732) INFO: 39epoch:train:7201-7300batch: iter_time=1.233e-04, forward_time=0.145, loss_ctc=66.711, loss_att=48.903, acc=0.700, loss=54.246, backward_time=1.039, grad_norm=116.360, clip=100.000, loss_scale=2.596e+32, optim_step_time=0.181, optim0_lr0=5.756e-05, train_time=2.824 +[gpub007:0/64] 2023-07-11 23:39:05,143 (trainer:732) INFO: 39epoch:train:7301-7400batch: iter_time=1.284e-04, forward_time=0.145, loss_ctc=75.458, loss_att=57.210, acc=0.713, loss=62.684, backward_time=1.027, grad_norm=114.220, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.756e-05, train_time=2.727 +[gpub007:0/64] 2023-07-11 23:41:21,094 (trainer:732) INFO: 39epoch:train:7401-7500batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=74.979, loss_att=59.217, acc=0.713, loss=63.946, backward_time=1.027, grad_norm=115.240, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.755e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 23:41:23,694 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub007:0/64] 2023-07-11 23:41:41,916 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-11 23:41:45,376 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-11 23:41:45,377 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub007:0/64] 2023-07-11 23:41:45,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-11 23:46:42,941 (trainer:732) INFO: 39epoch:train:7501-7600batch: iter_time=1.296, forward_time=0.145, loss_ctc=77.503, loss_att=55.837, acc=0.711, loss=62.337, backward_time=1.051, grad_norm=145.896, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.754e-05, train_time=6.437 +[gpub007:0/64] 2023-07-11 23:48:59,674 (trainer:732) INFO: 39epoch:train:7601-7700batch: iter_time=1.041e-04, forward_time=0.144, loss_ctc=67.198, loss_att=51.955, acc=0.705, loss=56.528, backward_time=1.028, grad_norm=113.429, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.753e-05, train_time=2.734 +[gpub007:0/64] 2023-07-11 23:51:15,617 (trainer:732) INFO: 39epoch:train:7701-7800batch: iter_time=1.090e-04, forward_time=0.144, loss_ctc=69.917, loss_att=61.177, acc=0.693, loss=63.799, backward_time=1.028, grad_norm=115.644, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.752e-05, train_time=2.719 +[gpub007:0/64] 2023-07-11 23:53:31,519 (trainer:732) INFO: 39epoch:train:7801-7900batch: iter_time=1.015e-04, forward_time=0.145, loss_ctc=73.589, loss_att=58.090, acc=0.692, loss=62.739, backward_time=1.028, grad_norm=133.587, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.752e-05, train_time=2.718 +[gpub007:0/64] 2023-07-11 23:55:46,933 (trainer:732) INFO: 39epoch:train:7901-8000batch: iter_time=1.143e-04, forward_time=0.143, loss_ctc=66.739, loss_att=48.185, acc=0.715, loss=53.751, backward_time=1.025, grad_norm=104.374, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.751e-05, train_time=2.708 +[gpub007:0/64] 2023-07-11 23:58:02,634 (trainer:732) INFO: 39epoch:train:8001-8100batch: iter_time=1.041e-04, forward_time=0.144, loss_ctc=72.965, loss_att=54.303, acc=0.699, loss=59.902, backward_time=1.027, grad_norm=127.034, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.750e-05, train_time=2.714 +[gpub007:0/64] 2023-07-12 00:00:18,410 (trainer:732) INFO: 39epoch:train:8101-8200batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=74.311, loss_att=54.365, acc=0.710, loss=60.348, backward_time=1.028, grad_norm=116.272, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.749e-05, train_time=2.715 +[gpub007:0/64] 2023-07-12 00:02:34,457 (trainer:732) INFO: 39epoch:train:8201-8300batch: iter_time=1.422e-04, forward_time=0.147, loss_ctc=83.063, loss_att=65.404, acc=0.711, loss=70.701, backward_time=1.028, grad_norm=116.915, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.749e-05, train_time=2.721 +[gpub007:0/64] 2023-07-12 00:03:21,916 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub007:0/64] 2023-07-12 00:03:39,866 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-12 00:03:43,637 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-12 00:03:43,637 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub007:0/64] 2023-07-12 00:03:43,643 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-12 00:09:32,672 (trainer:732) INFO: 39epoch:train:8301-8400batch: iter_time=1.294, forward_time=0.173, loss_ctc=67.213, loss_att=48.682, acc=0.723, loss=54.241, backward_time=1.041, grad_norm=99.075, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.748e-05, train_time=8.364 +[gpub007:0/64] 2023-07-12 00:11:52,214 (trainer:732) INFO: 39epoch:train:8401-8500batch: iter_time=1.272e-04, forward_time=0.145, loss_ctc=70.258, loss_att=50.030, acc=0.724, loss=56.098, backward_time=1.027, grad_norm=127.922, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.747e-05, train_time=2.791 +[gpub007:0/64] 2023-07-12 00:14:09,395 (trainer:732) INFO: 39epoch:train:8501-8600batch: iter_time=1.290e-04, forward_time=0.146, loss_ctc=70.442, loss_att=61.909, acc=0.697, loss=64.469, backward_time=1.029, grad_norm=112.268, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.746e-05, train_time=2.743 +[gpub007:0/64] 2023-07-12 00:16:25,778 (trainer:732) INFO: 39epoch:train:8601-8700batch: iter_time=1.262e-04, forward_time=0.148, loss_ctc=74.135, loss_att=62.987, acc=0.695, loss=66.332, backward_time=1.032, grad_norm=125.859, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.746e-05, train_time=2.727 +[gpub007:0/64] 2023-07-12 00:18:41,463 (trainer:732) INFO: 39epoch:train:8701-8800batch: iter_time=1.386e-04, forward_time=0.146, loss_ctc=63.375, loss_att=49.055, acc=0.724, loss=53.351, backward_time=1.027, grad_norm=99.015, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.745e-05, train_time=2.713 +[gpub007:0/64] 2023-07-12 00:20:57,425 (trainer:732) INFO: 39epoch:train:8801-8900batch: iter_time=1.280e-04, forward_time=0.146, loss_ctc=78.425, loss_att=54.594, acc=0.729, loss=61.744, backward_time=1.029, grad_norm=136.659, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.744e-05, train_time=2.719 +[gpub007:0/64] 2023-07-12 00:23:13,295 (trainer:732) INFO: 39epoch:train:8901-9000batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=67.255, loss_att=49.057, acc=0.716, loss=54.517, backward_time=1.027, grad_norm=100.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.743e-05, train_time=2.717 +[gpub007:0/64] 2023-07-12 00:25:29,675 (trainer:732) INFO: 39epoch:train:9001-9100batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=83.715, loss_att=66.621, acc=0.708, loss=71.749, backward_time=1.031, grad_norm=134.965, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.743e-05, train_time=2.727 +[gpub007:0/64] 2023-07-12 00:27:06,738 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub007:0/64] 2023-07-12 00:27:24,585 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-12 00:27:28,341 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-12 00:27:28,341 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub007:0/64] 2023-07-12 00:27:28,347 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-12 00:32:56,782 (trainer:732) INFO: 39epoch:train:9101-9200batch: iter_time=1.495, forward_time=0.146, loss_ctc=64.232, loss_att=44.875, acc=0.735, loss=50.682, backward_time=1.035, grad_norm=100.758, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.742e-05, train_time=8.942 +[gpub007:0/64] 2023-07-12 00:35:13,889 (trainer:732) INFO: 39epoch:train:9201-9300batch: iter_time=1.259e-04, forward_time=0.147, loss_ctc=78.149, loss_att=56.447, acc=0.718, loss=62.958, backward_time=1.033, grad_norm=115.095, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.741e-05, train_time=2.742 +[gpub007:0/64] 2023-07-12 00:37:31,478 (trainer:732) INFO: 39epoch:train:9301-9400batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=65.513, loss_att=54.281, acc=0.706, loss=57.651, backward_time=1.029, grad_norm=99.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.740e-05, train_time=2.752 +[gpub007:0/64] 2023-07-12 00:39:48,848 (trainer:732) INFO: 39epoch:train:9401-9500batch: iter_time=1.150e-04, forward_time=0.147, loss_ctc=72.450, loss_att=67.505, acc=0.688, loss=68.988, backward_time=1.031, grad_norm=117.756, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.740e-05, train_time=2.747 +[gpub007:0/64] 2023-07-12 00:42:05,020 (trainer:732) INFO: 39epoch:train:9501-9600batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=69.448, loss_att=51.998, acc=0.725, loss=57.233, backward_time=1.027, grad_norm=113.463, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.739e-05, train_time=2.723 +[gpub007:0/64] 2023-07-12 00:44:21,146 (trainer:732) INFO: 39epoch:train:9601-9700batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=74.213, loss_att=52.124, acc=0.730, loss=58.751, backward_time=1.031, grad_norm=112.332, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.738e-05, train_time=2.722 +[gpub007:0/64] 2023-07-12 00:46:36,994 (trainer:732) INFO: 39epoch:train:9701-9800batch: iter_time=1.214e-04, forward_time=0.146, loss_ctc=67.094, loss_att=48.100, acc=0.715, loss=53.798, backward_time=1.028, grad_norm=99.421, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.737e-05, train_time=2.717 +[gpub007:0/64] 2023-07-12 00:48:53,521 (trainer:732) INFO: 39epoch:train:9801-9900batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=77.211, loss_att=58.814, acc=0.718, loss=64.333, backward_time=1.031, grad_norm=115.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.737e-05, train_time=2.730 +[gpub007:0/64] 2023-07-12 00:51:09,754 (trainer:732) INFO: 39epoch:train:9901-10000batch: iter_time=1.240e-04, forward_time=0.147, loss_ctc=75.077, loss_att=57.246, acc=0.722, loss=62.595, backward_time=1.030, grad_norm=115.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.736e-05, train_time=2.724 +[gpub007:0/64] 2023-07-12 01:05:55,834 (trainer:338) INFO: 39epoch results: [train] iter_time=0.163, forward_time=0.147, loss_ctc=72.756, loss_att=56.123, acc=0.707, loss=61.113, backward_time=1.031, grad_norm=118.372, clip=100.000, loss_scale=2.608e+32, optim_step_time=0.182, optim0_lr0=5.774e-05, train_time=3.347, time=4 hours, 39 minutes and 20.7 seconds, total_count=360000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=44.378, cer_ctc=0.263, loss_att=37.628, acc=0.669, cer=0.434, wer=1.000, loss=39.653, time=8 minutes and 26.87 seconds, total_count=36938, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 55.9 seconds, total_count=0, gpu_max_cached_mem_GB=37.219 +[gpub007:0/64] 2023-07-12 01:06:13,306 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub007:0/64] 2023-07-12 01:06:13,316 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/34epoch.pth +[gpub007:0/64] 2023-07-12 01:06:13,316 (trainer:272) INFO: 40/50epoch started. Estimated time to finish: 2 days, 5 hours and 35 minutes +[gpub007:0/64] 2023-07-12 01:06:13,891 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub007:0/64] 2023-07-12 01:06:32,113 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub007:0/64] 2023-07-12 01:06:35,520 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub007:0/64] 2023-07-12 01:06:35,520 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub007:0/64] 2023-07-12 01:06:35,527 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub007:0/64] 2023-07-12 01:13:09,997 (trainer:732) INFO: 40epoch:train:1-100batch: iter_time=2.741, forward_time=0.178, loss_ctc=61.185, loss_att=44.059, acc=0.696, loss=49.197, backward_time=1.040, grad_norm=120.778, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.735e-05, train_time=8.323 +[gpub007:0/64] 2023-07-12 01:15:26,329 (trainer:732) INFO: 40epoch:train:101-200batch: iter_time=1.114e-04, forward_time=0.144, loss_ctc=72.157, loss_att=57.240, acc=0.700, loss=61.715, backward_time=1.029, grad_norm=119.348, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.734e-05, train_time=2.727 +[gpub007:0/64] 2023-07-12 01:17:46,280 (trainer:732) INFO: 40epoch:train:201-300batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=81.039, loss_att=57.292, acc=0.717, loss=64.417, backward_time=1.028, grad_norm=165.528, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.734e-05, train_time=2.799 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log new file mode 100644 index 0000000000000000000000000000000000000000..9bda1668159e277a7ef0b4685c57c08a8c5385a5 --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log @@ -0,0 +1,4446 @@ +# Running on gpub005.delta.ncsa.illinois.edu +# Started at Fri Jul 7 20:05:18 CDT 2023 +# SLURMD_NODENAME=gpub005 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2138608 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2138608 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[005,012-014,018,030,039-041,067,072,084,095-098]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[005,012-014,018,030,039-041,067,072,084,095-098]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=2408067 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub005 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b +[gpub005:0/64] 2023-07-07 20:08:24,356 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub005:0/64] 2023-07-07 20:08:25,263 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub005:0/64] 2023-07-07 20:08:25,288 (s2t:483) INFO: Vocabulary size: 50002 +[gpub005:0/64] 2023-07-07 20:08:40,904 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub005:0/64] 2023-07-07 20:08:40,939 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub005:0/64] 2023-07-07 20:08:41,640 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub005:0/64] 2023-07-07 20:08:50,082 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:08:50,311 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:08:50,311 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub005:0/64] 2023-07-07 20:08:50,314 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub005:0/64] 2023-07-07 20:08:50,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub005:0/64] 2023-07-07 20:09:20,037 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub005:2408151:2408151 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0> +gpub005:2408151:2408151 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub005:2408151:2408151 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub005:0/64] 2023-07-07 20:09:25,530 (trainer:284) INFO: 23/30epoch started +[gpub005:0/64] 2023-07-07 20:09:25,594 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-07 20:09:43,306 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:09:46,752 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:09:46,752 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-07 20:09:46,758 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpub005:2408154:2408154 [3] NCCL INFO cudaDriverVersion 12010 +gpub005:2408154:2408154 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0> +gpub005:2408154:2408154 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub005:2408154:2408227 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0> +gpub005:2408154:2408227 [3] NCCL INFO Using network IB +gpub005:2408154:2408227 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub005:2408154:2408227 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub005:2408154:2408227 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub005:2408154:2408227 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub005:2408154:2408227 [3] NCCL INFO Connected all rings +gpub005:2408154:2408227 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub005:2408154:2408227 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub005:2408154:2408227 [3] NCCL INFO Connected all trees +gpub005:2408154:2408227 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub005:2408154:2408227 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub005:2408154:2408227 [3] NCCL INFO comm 0x519a6580 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub005:2408151:2408226 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0> +gpub005:2408151:2408226 [0] NCCL INFO Using network IB +gpub005:2408151:2408226 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub005:2408151:2408226 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub005:2408151:2408226 [0] NCCL INFO Connected all rings +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub005:2408151:2408226 [0] NCCL INFO Connected all trees +gpub005:2408151:2408226 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub005:2408151:2408226 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub005:2408151:2408226 [0] NCCL INFO comm 0x8dda0850 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub005:2408152:2408152 [1] NCCL INFO cudaDriverVersion 12010 +gpub005:2408152:2408152 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0> +gpub005:2408152:2408152 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub005:2408152:2408228 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0> +gpub005:2408152:2408228 [1] NCCL INFO Using network IB +gpub005:2408152:2408228 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub005:2408152:2408228 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub005:2408152:2408228 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub005:2408152:2408228 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub005:2408152:2408228 [1] NCCL INFO Connected all rings +gpub005:2408152:2408228 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub005:2408152:2408228 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub005:2408152:2408228 [1] NCCL INFO Connected all trees +gpub005:2408152:2408228 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub005:2408152:2408228 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub005:2408152:2408228 [1] NCCL INFO comm 0x50e7e140 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub067:1574054:1574054 [0] NCCL INFO cudaDriverVersion 12010 +gpub067:1574054:1574054 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1574054:1574054 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1574054:1574131 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1574054:1574131 [0] NCCL INFO Using network IB +gpub067:1574054:1574131 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub067:1574054:1574131 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub067:1574054:1574131 [0] NCCL INFO Connected all rings +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub067:1574054:1574131 [0] NCCL INFO Connected all trees +gpub067:1574054:1574131 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1574054:1574131 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1574054:1574131 [0] NCCL INFO comm 0x4f342150 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub095:2520061:2520061 [2] NCCL INFO cudaDriverVersion 12010 +gpub095:2520061:2520061 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0> +gpub095:2520061:2520061 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub095:2520061:2520137 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0> +gpub095:2520061:2520137 [2] NCCL INFO Using network IB +gpub095:2520061:2520137 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub095:2520061:2520137 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub095:2520061:2520137 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub095:2520061:2520137 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub095:2520061:2520137 [2] NCCL INFO Connected all rings +gpub095:2520061:2520137 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub095:2520061:2520137 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub095:2520061:2520137 [2] NCCL INFO Connected all trees +gpub095:2520061:2520137 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub095:2520061:2520137 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub095:2520061:2520137 [2] NCCL INFO comm 0x91b7930 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub013:1694053:1694053 [0] NCCL INFO cudaDriverVersion 12010 +gpub013:1694053:1694053 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1694053:1694053 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1694053:1694130 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1694053:1694130 [0] NCCL INFO Using network IB +gpub013:1694053:1694130 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub013:1694053:1694130 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub013:1694053:1694130 [0] NCCL INFO Connected all rings +gpub005:2408153:2408153 [2] NCCL INFO cudaDriverVersion 12010 +gpub005:2408153:2408153 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0> +gpub005:2408153:2408153 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub005:2408153:2408229 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0> +gpub005:2408153:2408229 [2] NCCL INFO Using network IB +gpub005:2408153:2408229 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub005:2408153:2408229 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub005:2408153:2408229 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub005:2408153:2408229 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub005:2408153:2408229 [2] NCCL INFO Connected all rings +gpub005:2408153:2408229 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub005:2408153:2408229 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub013:1694053:1694130 [0] NCCL INFO Connected all trees +gpub013:1694053:1694130 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub013:1694053:1694130 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1694053:1694130 [0] NCCL INFO comm 0x8c6ae750 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub005:2408153:2408229 [2] NCCL INFO Connected all trees +gpub005:2408153:2408229 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub005:2408153:2408229 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub005:2408153:2408229 [2] NCCL INFO comm 0x4fab6870 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub013:1694054:1694054 [1] NCCL INFO cudaDriverVersion 12010 +gpub013:1694054:1694054 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1694054:1694054 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1694054:1694131 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1694054:1694131 [1] NCCL INFO Using network IB +gpub013:1694054:1694131 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub013:1694054:1694131 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub013:1694054:1694131 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub013:1694054:1694131 [1] NCCL INFO Connected all rings +gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub013:1694054:1694131 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub013:1694054:1694131 [1] NCCL INFO Connected all trees +gpub013:1694054:1694131 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub013:1694054:1694131 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1694054:1694131 [1] NCCL INFO comm 0x5088d590 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub013:1694055:1694055 [2] NCCL INFO cudaDriverVersion 12010 +gpub013:1694055:1694055 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1694055:1694055 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1694055:1694128 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1694055:1694128 [2] NCCL INFO Using network IB +gpub013:1694055:1694128 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub013:1694055:1694128 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub013:1694055:1694128 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub013:1694055:1694128 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub013:1694055:1694128 [2] NCCL INFO Connected all rings +gpub013:1694055:1694128 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub013:1694055:1694128 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub013:1694055:1694128 [2] NCCL INFO Connected all trees +gpub013:1694055:1694128 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub013:1694055:1694128 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1694055:1694128 [2] NCCL INFO comm 0xf6b9b10 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub067:1574055:1574055 [1] NCCL INFO cudaDriverVersion 12010 +gpub067:1574055:1574055 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1574055:1574055 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1574055:1574134 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1574055:1574134 [1] NCCL INFO Using network IB +gpub067:1574055:1574134 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub067:1574055:1574134 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub067:1574055:1574134 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub067:1574055:1574134 [1] NCCL INFO Connected all rings +gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub067:1574055:1574134 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub067:1574055:1574134 [1] NCCL INFO Connected all trees +gpub067:1574055:1574134 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1574055:1574134 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1574055:1574134 [1] NCCL INFO comm 0x509b90f0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub095:2520062:2520062 [3] NCCL INFO cudaDriverVersion 12010 +gpub095:2520062:2520062 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0> +gpub095:2520062:2520062 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub095:2520062:2520138 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0> +gpub095:2520062:2520138 [3] NCCL INFO Using network IB +gpub095:2520062:2520138 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub095:2520062:2520138 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub095:2520062:2520138 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub095:2520062:2520138 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub095:2520062:2520138 [3] NCCL INFO Connected all rings +gpub095:2520062:2520138 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub095:2520062:2520138 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub095:2520062:2520138 [3] NCCL INFO Connected all trees +gpub095:2520062:2520138 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub095:2520062:2520138 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub095:2520062:2520138 [3] NCCL INFO comm 0x8c7104c0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub095:2520059:2520059 [0] NCCL INFO cudaDriverVersion 12010 +gpub095:2520059:2520059 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0> +gpub095:2520059:2520059 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub095:2520059:2520136 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0> +gpub095:2520059:2520136 [0] NCCL INFO Using network IB +gpub095:2520059:2520136 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub095:2520059:2520136 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub095:2520059:2520136 [0] NCCL INFO Connected all rings +gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub095:2520059:2520136 [0] NCCL INFO Connected all trees +gpub095:2520059:2520136 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub095:2520059:2520136 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub095:2520059:2520136 [0] NCCL INFO comm 0x15bb1c50 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub095:2520060:2520060 [1] NCCL INFO cudaDriverVersion 12010 +gpub095:2520060:2520060 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0> +gpub095:2520060:2520060 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub095:2520060:2520135 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0> +gpub095:2520060:2520135 [1] NCCL INFO Using network IB +gpub095:2520060:2520135 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub095:2520060:2520135 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub095:2520060:2520135 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub095:2520060:2520135 [1] NCCL INFO Connected all rings +gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub095:2520060:2520135 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub095:2520060:2520135 [1] NCCL INFO Connected all trees +gpub095:2520060:2520135 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub095:2520060:2520135 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub095:2520060:2520135 [1] NCCL INFO comm 0xb4653490 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub098:1875739:1875739 [1] NCCL INFO cudaDriverVersion 12010 +gpub098:1875739:1875739 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1875739:1875739 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub098:1875739:1875807 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0> +gpub098:1875739:1875807 [1] NCCL INFO Using network IB +gpub098:1875739:1875807 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub098:1875739:1875807 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub098:1875739:1875807 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub098:1875739:1875807 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub098:1875739:1875807 [1] NCCL INFO Connected all rings +gpub098:1875739:1875807 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub098:1875739:1875807 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub098:1875739:1875807 [1] NCCL INFO Connected all trees +gpub098:1875739:1875807 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub098:1875739:1875807 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1875739:1875807 [1] NCCL INFO comm 0x4ffeee90 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub098:1875738:1875738 [0] NCCL INFO cudaDriverVersion 12010 +gpub098:1875738:1875738 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1875738:1875738 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub098:1875738:1875809 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0> +gpub098:1875738:1875809 [0] NCCL INFO Using network IB +gpub098:1875738:1875809 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub098:1875738:1875809 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub098:1875738:1875809 [0] NCCL INFO Connected all rings +gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub098:1875738:1875809 [0] NCCL INFO Connected all trees +gpub098:1875738:1875809 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub098:1875738:1875809 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1875738:1875809 [0] NCCL INFO comm 0x9e5ca730 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub098:1875740:1875740 [2] NCCL INFO cudaDriverVersion 12010 +gpub098:1875740:1875740 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1875740:1875740 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub098:1875740:1875808 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0> +gpub098:1875740:1875808 [2] NCCL INFO Using network IB +gpub098:1875740:1875808 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub098:1875740:1875808 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub098:1875740:1875808 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub098:1875740:1875808 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub098:1875740:1875808 [2] NCCL INFO Connected all rings +gpub098:1875740:1875808 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub098:1875740:1875808 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub098:1875740:1875808 [2] NCCL INFO Connected all trees +gpub098:1875740:1875808 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub098:1875740:1875808 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1875740:1875808 [2] NCCL INFO comm 0x8c9fbb0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:4052709:4052709 [1] NCCL INFO cudaDriverVersion 12010 +gpub084:4052709:4052709 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:4052709:4052709 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:4052709:4052793 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:4052709:4052793 [1] NCCL INFO Using network IB +gpub084:4052709:4052793 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub084:4052709:4052793 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub084:4052709:4052793 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Connected all rings +gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub072:1805521:1805521 [2] NCCL INFO cudaDriverVersion 12010 +gpub072:1805521:1805521 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:1805521:1805521 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:1805521:1805605 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0> +gpub072:1805521:1805605 [2] NCCL INFO Using network IB +gpub072:1805521:1805605 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub072:1805521:1805605 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub072:1805521:1805605 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub072:1805521:1805605 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub072:1805521:1805605 [2] NCCL INFO Connected all rings +gpub072:1805521:1805605 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub072:1805521:1805605 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub084:4052709:4052793 [1] NCCL INFO Connected all trees +gpub084:4052709:4052793 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:4052709:4052793 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:4052709:4052793 [1] NCCL INFO comm 0xd834420 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub072:1805521:1805605 [2] NCCL INFO Connected all trees +gpub072:1805521:1805605 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub072:1805521:1805605 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:1805521:1805605 [2] NCCL INFO comm 0x8d829e60 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub067:1574056:1574056 [2] NCCL INFO cudaDriverVersion 12010 +gpub067:1574056:1574056 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1574056:1574056 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1574056:1574133 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1574056:1574133 [2] NCCL INFO Using network IB +gpub067:1574056:1574133 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub067:1574056:1574133 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub067:1574056:1574133 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub067:1574056:1574133 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub067:1574056:1574133 [2] NCCL INFO Connected all rings +gpub067:1574056:1574133 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub067:1574056:1574133 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub067:1574056:1574133 [2] NCCL INFO Connected all trees +gpub067:1574056:1574133 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1574056:1574133 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1574056:1574133 [2] NCCL INFO comm 0xb006d7d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub013:1694056:1694056 [3] NCCL INFO cudaDriverVersion 12010 +gpub013:1694056:1694056 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0> +gpub013:1694056:1694056 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub013:1694056:1694129 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0> +gpub013:1694056:1694129 [3] NCCL INFO Using network IB +gpub013:1694056:1694129 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub013:1694056:1694129 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub013:1694056:1694129 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub013:1694056:1694129 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub013:1694056:1694129 [3] NCCL INFO Connected all rings +gpub013:1694056:1694129 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub013:1694056:1694129 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub013:1694056:1694129 [3] NCCL INFO Connected all trees +gpub013:1694056:1694129 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub013:1694056:1694129 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub013:1694056:1694129 [3] NCCL INFO comm 0x8c00090 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub040:2093690:2093690 [0] NCCL INFO cudaDriverVersion 12010 +gpub040:2093690:2093690 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2093690:2093690 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2093690:2093772 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2093690:2093772 [0] NCCL INFO Using network IB +gpub040:2093690:2093772 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub040:2093690:2093772 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub040:2093690:2093772 [0] NCCL INFO Connected all rings +gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub040:2093690:2093772 [0] NCCL INFO Connected all trees +gpub040:2093690:2093772 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub040:2093690:2093772 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2093690:2093772 [0] NCCL INFO comm 0xba9dc4d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub014:1495255:1495255 [1] NCCL INFO cudaDriverVersion 12010 +gpub014:1495255:1495255 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1495255:1495255 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1495255:1495331 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1495255:1495331 [1] NCCL INFO Using network IB +gpub014:1495255:1495331 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub014:1495255:1495331 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub014:1495255:1495331 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub014:1495255:1495331 [1] NCCL INFO Connected all rings +gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub014:1495255:1495331 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub014:1495255:1495331 [1] NCCL INFO Connected all trees +gpub014:1495255:1495331 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub014:1495255:1495331 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1495255:1495331 [1] NCCL INFO comm 0x515d3c50 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub098:1875741:1875741 [3] NCCL INFO cudaDriverVersion 12010 +gpub098:1875741:1875741 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0> +gpub098:1875741:1875741 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub098:1875741:1875810 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0> +gpub098:1875741:1875810 [3] NCCL INFO Using network IB +gpub098:1875741:1875810 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub098:1875741:1875810 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub098:1875741:1875810 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub098:1875741:1875810 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub098:1875741:1875810 [3] NCCL INFO Connected all rings +gpub098:1875741:1875810 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub098:1875741:1875810 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub098:1875741:1875810 [3] NCCL INFO Connected all trees +gpub098:1875741:1875810 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub098:1875741:1875810 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub098:1875741:1875810 [3] NCCL INFO comm 0x4ecd4ee0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub072:1805520:1805520 [1] NCCL INFO cudaDriverVersion 12010 +gpub072:1805520:1805520 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:1805520:1805520 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:1805520:1805604 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0> +gpub072:1805520:1805604 [1] NCCL INFO Using network IB +gpub072:1805520:1805604 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub072:1805520:1805604 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub072:1805520:1805604 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub072:1805520:1805604 [1] NCCL INFO Connected all rings +gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub072:1805520:1805604 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub072:1805520:1805604 [1] NCCL INFO Connected all trees +gpub072:1805520:1805604 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub072:1805520:1805604 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:1805520:1805604 [1] NCCL INFO comm 0xb6f41780 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub067:1574057:1574057 [3] NCCL INFO cudaDriverVersion 12010 +gpub067:1574057:1574057 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1574057:1574057 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1574057:1574132 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1574057:1574132 [3] NCCL INFO Using network IB +gpub067:1574057:1574132 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub067:1574057:1574132 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub067:1574057:1574132 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub067:1574057:1574132 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub067:1574057:1574132 [3] NCCL INFO Connected all rings +gpub067:1574057:1574132 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub067:1574057:1574132 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub067:1574057:1574132 [3] NCCL INFO Connected all trees +gpub067:1574057:1574132 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1574057:1574132 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1574057:1574132 [3] NCCL INFO comm 0x8d973650 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub014:1495257:1495257 [3] NCCL INFO cudaDriverVersion 12010 +gpub014:1495257:1495257 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1495257:1495257 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1495257:1495328 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1495257:1495328 [3] NCCL INFO Using network IB +gpub014:1495257:1495328 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub014:1495257:1495328 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub014:1495257:1495328 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub014:1495257:1495328 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub014:1495257:1495328 [3] NCCL INFO Connected all rings +gpub014:1495257:1495328 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub014:1495257:1495328 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub014:1495257:1495328 [3] NCCL INFO Connected all trees +gpub014:1495257:1495328 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub014:1495257:1495328 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1495257:1495328 [3] NCCL INFO comm 0x946a450 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub096:1645784:1645784 [0] NCCL INFO cudaDriverVersion 12010 +gpub096:1645784:1645784 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1645784:1645784 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1645784:1645856 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1645784:1645856 [0] NCCL INFO Using network IB +gpub096:1645784:1645856 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub096:1645784:1645856 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub096:1645784:1645856 [0] NCCL INFO Connected all rings +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub096:1645784:1645856 [0] NCCL INFO Connected all trees +gpub096:1645784:1645856 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1645784:1645856 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1645784:1645856 [0] NCCL INFO comm 0xcdcc14f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub012:1607821:1607821 [3] NCCL INFO cudaDriverVersion 12010 +gpub012:1607821:1607821 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1607821:1607821 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1607821:1607901 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1607821:1607901 [3] NCCL INFO Using network IB +gpub012:1607821:1607901 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub012:1607821:1607901 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub012:1607821:1607901 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub012:1607821:1607901 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub012:1607821:1607901 [3] NCCL INFO Connected all rings +gpub012:1607821:1607901 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub012:1607821:1607901 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub012:1607821:1607901 [3] NCCL INFO Connected all trees +gpub012:1607821:1607901 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub012:1607821:1607901 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1607821:1607901 [3] NCCL INFO comm 0x516c3430 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub041:1527385:1527385 [2] NCCL INFO cudaDriverVersion 12010 +gpub041:1527385:1527385 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1527385:1527385 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1527385:1527462 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1527385:1527462 [2] NCCL INFO Using network IB +gpub041:1527385:1527462 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub041:1527385:1527462 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub041:1527385:1527462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub041:1527385:1527462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub041:1527385:1527462 [2] NCCL INFO Connected all rings +gpub041:1527385:1527462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub041:1527385:1527462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub041:1527385:1527462 [2] NCCL INFO Connected all trees +gpub041:1527385:1527462 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub041:1527385:1527462 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1527385:1527462 [2] NCCL INFO comm 0x5082e9e0 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub041:1527384:1527384 [1] NCCL INFO cudaDriverVersion 12010 +gpub041:1527384:1527384 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1527384:1527384 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1527384:1527459 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1527384:1527459 [1] NCCL INFO Using network IB +gpub041:1527384:1527459 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub041:1527384:1527459 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub041:1527384:1527459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub041:1527384:1527459 [1] NCCL INFO Connected all rings +gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub041:1527384:1527459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub041:1527384:1527459 [1] NCCL INFO Connected all trees +gpub041:1527384:1527459 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub041:1527384:1527459 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1527384:1527459 [1] NCCL INFO comm 0x512a04d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub041:1527383:1527383 [0] NCCL INFO cudaDriverVersion 12010 +gpub041:1527383:1527383 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1527383:1527383 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1527383:1527461 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1527383:1527461 [0] NCCL INFO Using network IB +gpub041:1527383:1527461 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub041:1527383:1527461 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub041:1527383:1527461 [0] NCCL INFO Connected all rings +gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub041:1527383:1527461 [0] NCCL INFO Connected all trees +gpub041:1527383:1527461 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub041:1527383:1527461 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1527383:1527461 [0] NCCL INFO comm 0x5103b480 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub018:1650756:1650756 [3] NCCL INFO cudaDriverVersion 12010 +gpub018:1650756:1650756 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0> +gpub018:1650756:1650756 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub018:1650756:1650832 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0> +gpub018:1650756:1650832 [3] NCCL INFO Using network IB +gpub018:1650756:1650832 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub018:1650756:1650832 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub018:1650756:1650832 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub018:1650756:1650832 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub018:1650756:1650832 [3] NCCL INFO Connected all rings +gpub018:1650756:1650832 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub018:1650756:1650832 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub018:1650756:1650832 [3] NCCL INFO Connected all trees +gpub018:1650756:1650832 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub018:1650756:1650832 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub018:1650756:1650832 [3] NCCL INFO comm 0x8c504da0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub084:4052710:4052710 [2] NCCL INFO cudaDriverVersion 12010 +gpub084:4052710:4052710 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:4052710:4052710 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:4052710:4052796 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:4052710:4052796 [2] NCCL INFO Using network IB +gpub084:4052710:4052796 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub084:4052710:4052796 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub084:4052710:4052796 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub084:4052710:4052796 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub084:4052710:4052796 [2] NCCL INFO Connected all rings +gpub084:4052710:4052796 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub084:4052710:4052796 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub084:4052710:4052796 [2] NCCL INFO Connected all trees +gpub084:4052710:4052796 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:4052710:4052796 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:4052710:4052796 [2] NCCL INFO comm 0x4f81fce0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub014:1495256:1495256 [2] NCCL INFO cudaDriverVersion 12010 +gpub014:1495256:1495256 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1495256:1495256 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1495256:1495330 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1495256:1495330 [2] NCCL INFO Using network IB +gpub014:1495256:1495330 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub014:1495256:1495330 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub014:1495256:1495330 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub014:1495256:1495330 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub014:1495256:1495330 [2] NCCL INFO Connected all rings +gpub014:1495256:1495330 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub014:1495256:1495330 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub018:1650755:1650755 [2] NCCL INFO cudaDriverVersion 12010 +gpub018:1650755:1650755 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0> +gpub018:1650755:1650755 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub018:1650755:1650833 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0> +gpub018:1650755:1650833 [2] NCCL INFO Using network IB +gpub018:1650755:1650833 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub018:1650755:1650833 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub018:1650755:1650833 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub018:1650755:1650833 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub018:1650755:1650833 [2] NCCL INFO Connected all rings +gpub018:1650755:1650833 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub018:1650755:1650833 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub014:1495256:1495330 [2] NCCL INFO Connected all trees +gpub014:1495256:1495330 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub014:1495256:1495330 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1495256:1495330 [2] NCCL INFO comm 0x9f383a90 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub018:1650755:1650833 [2] NCCL INFO Connected all trees +gpub018:1650755:1650833 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub018:1650755:1650833 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub018:1650755:1650833 [2] NCCL INFO comm 0x513374c0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub040:2093692:2093692 [2] NCCL INFO cudaDriverVersion 12010 +gpub040:2093692:2093692 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2093692:2093692 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2093692:2093775 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2093692:2093775 [2] NCCL INFO Using network IB +gpub040:2093692:2093775 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub040:2093692:2093775 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub040:2093692:2093775 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub040:2093692:2093775 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub040:2093692:2093775 [2] NCCL INFO Connected all rings +gpub040:2093692:2093775 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub040:2093692:2093775 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub084:4052711:4052711 [3] NCCL INFO cudaDriverVersion 12010 +gpub084:4052711:4052711 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:4052711:4052711 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:4052711:4052795 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:4052711:4052795 [3] NCCL INFO Using network IB +gpub084:4052711:4052795 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub084:4052711:4052795 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub084:4052711:4052795 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub084:4052711:4052795 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub084:4052711:4052795 [3] NCCL INFO Connected all rings +gpub084:4052711:4052795 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub084:4052711:4052795 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub040:2093692:2093775 [2] NCCL INFO Connected all trees +gpub040:2093692:2093775 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub040:2093692:2093775 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2093692:2093775 [2] NCCL INFO comm 0x514bd130 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:4052711:4052795 [3] NCCL INFO Connected all trees +gpub084:4052711:4052795 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:4052711:4052795 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:4052711:4052795 [3] NCCL INFO comm 0xa5710b50 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub072:1805519:1805519 [0] NCCL INFO cudaDriverVersion 12010 +gpub072:1805519:1805519 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:1805519:1805519 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:1805519:1805602 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0> +gpub072:1805519:1805602 [0] NCCL INFO Using network IB +gpub072:1805519:1805602 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub072:1805519:1805602 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub072:1805519:1805602 [0] NCCL INFO Connected all rings +gpub012:1607819:1607819 [1] NCCL INFO cudaDriverVersion 12010 +gpub012:1607819:1607819 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1607819:1607819 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1607819:1607899 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1607819:1607899 [1] NCCL INFO Using network IB +gpub012:1607819:1607899 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub012:1607819:1607899 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub012:1607819:1607899 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub012:1607819:1607899 [1] NCCL INFO Connected all rings +gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub072:1805519:1805602 [0] NCCL INFO Connected all trees +gpub072:1805519:1805602 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub072:1805519:1805602 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:1805519:1805602 [0] NCCL INFO comm 0x4fb13ad0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub012:1607819:1607899 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub012:1607819:1607899 [1] NCCL INFO Connected all trees +gpub012:1607819:1607899 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub012:1607819:1607899 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1607819:1607899 [1] NCCL INFO comm 0xa4ee840 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub039:2093177:2093177 [2] NCCL INFO cudaDriverVersion 12010 +gpub039:2093177:2093177 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:2093177:2093177 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:2093177:2093242 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:2093177:2093242 [2] NCCL INFO Using network IB +gpub039:2093177:2093242 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub039:2093177:2093242 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub039:2093177:2093242 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub039:2093177:2093242 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub039:2093177:2093242 [2] NCCL INFO Connected all rings +gpub039:2093177:2093242 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub039:2093177:2093242 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub039:2093177:2093242 [2] NCCL INFO Connected all trees +gpub039:2093177:2093242 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub039:2093177:2093242 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:2093177:2093242 [2] NCCL INFO comm 0xa965b10 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub097:1705871:1705871 [3] NCCL INFO cudaDriverVersion 12010 +gpub097:1705871:1705871 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0> +gpub097:1705871:1705871 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub097:1705871:1705957 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0> +gpub097:1705871:1705957 [3] NCCL INFO Using network IB +gpub097:1705871:1705957 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub097:1705871:1705957 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub097:1705871:1705957 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub097:1705871:1705957 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub097:1705871:1705957 [3] NCCL INFO Connected all rings +gpub097:1705871:1705957 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub097:1705871:1705957 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub097:1705871:1705957 [3] NCCL INFO Connected all trees +gpub097:1705871:1705957 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub097:1705871:1705957 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub097:1705871:1705957 [3] NCCL INFO comm 0x94d2db0 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub097:1705870:1705870 [2] NCCL INFO cudaDriverVersion 12010 +gpub097:1705870:1705870 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0> +gpub097:1705870:1705870 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub097:1705870:1705956 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0> +gpub097:1705870:1705956 [2] NCCL INFO Using network IB +gpub097:1705870:1705956 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub097:1705870:1705956 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub097:1705870:1705956 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub097:1705870:1705956 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub097:1705870:1705956 [2] NCCL INFO Connected all rings +gpub097:1705870:1705956 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub097:1705870:1705956 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub039:2093175:2093175 [0] NCCL INFO cudaDriverVersion 12010 +gpub039:2093175:2093175 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:2093175:2093175 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:2093175:2093244 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:2093175:2093244 [0] NCCL INFO Using network IB +gpub039:2093175:2093244 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub039:2093175:2093244 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub039:2093175:2093244 [0] NCCL INFO Connected all rings +gpub097:1705870:1705956 [2] NCCL INFO Connected all trees +gpub097:1705870:1705956 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub097:1705870:1705956 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub097:1705870:1705956 [2] NCCL INFO comm 0x50f117a0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub039:2093175:2093244 [0] NCCL INFO Connected all trees +gpub039:2093175:2093244 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub039:2093175:2093244 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:2093175:2093244 [0] NCCL INFO comm 0xa2cab60 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub041:1527386:1527386 [3] NCCL INFO cudaDriverVersion 12010 +gpub041:1527386:1527386 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0> +gpub041:1527386:1527386 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub041:1527386:1527460 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0> +gpub041:1527386:1527460 [3] NCCL INFO Using network IB +gpub041:1527386:1527460 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub041:1527386:1527460 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub041:1527386:1527460 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub041:1527386:1527460 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub041:1527386:1527460 [3] NCCL INFO Connected all rings +gpub041:1527386:1527460 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub041:1527386:1527460 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub030:2531969:2531969 [0] NCCL INFO cudaDriverVersion 12010 +gpub030:2531969:2531969 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2531969:2531969 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2531969:2532049 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2531969:2532049 [0] NCCL INFO Using network IB +gpub030:2531969:2532049 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub030:2531969:2532049 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub030:2531969:2532049 [0] NCCL INFO Connected all rings +gpub041:1527386:1527460 [3] NCCL INFO Connected all trees +gpub041:1527386:1527460 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub041:1527386:1527460 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub041:1527386:1527460 [3] NCCL INFO comm 0x4f979490 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub012:1607818:1607818 [0] NCCL INFO cudaDriverVersion 12010 +gpub012:1607818:1607818 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1607818:1607818 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1607818:1607902 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1607818:1607902 [0] NCCL INFO Using network IB +gpub012:1607818:1607902 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub012:1607818:1607902 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub012:1607818:1607902 [0] NCCL INFO Connected all rings +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub030:2531969:2532049 [0] NCCL INFO Connected all trees +gpub030:2531969:2532049 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2531969:2532049 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2531969:2532049 [0] NCCL INFO comm 0xb4f6de0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub012:1607818:1607902 [0] NCCL INFO Connected all trees +gpub012:1607818:1607902 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub012:1607818:1607902 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1607818:1607902 [0] NCCL INFO comm 0xa8f3bc80 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub039:2093178:2093178 [3] NCCL INFO cudaDriverVersion 12010 +gpub039:2093178:2093178 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:2093178:2093178 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:2093178:2093243 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:2093178:2093243 [3] NCCL INFO Using network IB +gpub039:2093178:2093243 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub039:2093178:2093243 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub039:2093178:2093243 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub039:2093178:2093243 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub039:2093178:2093243 [3] NCCL INFO Connected all rings +gpub039:2093178:2093243 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub039:2093178:2093243 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub039:2093178:2093243 [3] NCCL INFO Connected all trees +gpub039:2093178:2093243 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub039:2093178:2093243 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:2093178:2093243 [3] NCCL INFO comm 0x4fc75960 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub030:2531970:2531970 [1] NCCL INFO cudaDriverVersion 12010 +gpub030:2531970:2531970 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2531970:2531970 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2531970:2532052 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2531970:2532052 [1] NCCL INFO Using network IB +gpub030:2531970:2532052 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub030:2531970:2532052 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub030:2531970:2532052 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub030:2531970:2532052 [1] NCCL INFO Connected all rings +gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub030:2531970:2532052 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub030:2531970:2532052 [1] NCCL INFO Connected all trees +gpub030:2531970:2532052 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2531970:2532052 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2531970:2532052 [1] NCCL INFO comm 0x8ebc3340 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub012:1607820:1607820 [2] NCCL INFO cudaDriverVersion 12010 +gpub012:1607820:1607820 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0> +gpub012:1607820:1607820 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub012:1607820:1607900 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0> +gpub012:1607820:1607900 [2] NCCL INFO Using network IB +gpub012:1607820:1607900 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub012:1607820:1607900 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub012:1607820:1607900 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub012:1607820:1607900 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub012:1607820:1607900 [2] NCCL INFO Connected all rings +gpub012:1607820:1607900 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub012:1607820:1607900 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub072:1805522:1805522 [3] NCCL INFO cudaDriverVersion 12010 +gpub072:1805522:1805522 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0> +gpub072:1805522:1805522 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub072:1805522:1805603 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0> +gpub072:1805522:1805603 [3] NCCL INFO Using network IB +gpub072:1805522:1805603 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub072:1805522:1805603 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub072:1805522:1805603 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub072:1805522:1805603 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub072:1805522:1805603 [3] NCCL INFO Connected all rings +gpub072:1805522:1805603 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub072:1805522:1805603 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub039:2093176:2093176 [1] NCCL INFO cudaDriverVersion 12010 +gpub039:2093176:2093176 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0> +gpub039:2093176:2093176 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub039:2093176:2093245 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0> +gpub039:2093176:2093245 [1] NCCL INFO Using network IB +gpub039:2093176:2093245 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub039:2093176:2093245 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub039:2093176:2093245 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub039:2093176:2093245 [1] NCCL INFO Connected all rings +gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub012:1607820:1607900 [2] NCCL INFO Connected all trees +gpub012:1607820:1607900 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub012:1607820:1607900 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub012:1607820:1607900 [2] NCCL INFO comm 0x503f1430 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub072:1805522:1805603 [3] NCCL INFO Connected all trees +gpub072:1805522:1805603 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub072:1805522:1805603 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub072:1805522:1805603 [3] NCCL INFO comm 0x50740450 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub039:2093176:2093245 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub039:2093176:2093245 [1] NCCL INFO Connected all trees +gpub039:2093176:2093245 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub039:2093176:2093245 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub039:2093176:2093245 [1] NCCL INFO comm 0xbcbaabd0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub030:2531972:2531972 [3] NCCL INFO cudaDriverVersion 12010 +gpub030:2531972:2531972 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2531972:2531972 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2531972:2532050 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2531972:2532050 [3] NCCL INFO Using network IB +gpub030:2531972:2532050 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub030:2531972:2532050 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub030:2531972:2532050 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub030:2531972:2532050 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub030:2531972:2532050 [3] NCCL INFO Connected all rings +gpub030:2531972:2532050 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub030:2531972:2532050 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub030:2531972:2532050 [3] NCCL INFO Connected all trees +gpub030:2531972:2532050 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2531972:2532050 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2531972:2532050 [3] NCCL INFO comm 0xa2cf1d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub018:1650754:1650754 [1] NCCL INFO cudaDriverVersion 12010 +gpub018:1650754:1650754 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0> +gpub018:1650754:1650754 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub018:1650754:1650831 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0> +gpub018:1650754:1650831 [1] NCCL INFO Using network IB +gpub018:1650754:1650831 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub018:1650754:1650831 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub018:1650754:1650831 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub018:1650754:1650831 [1] NCCL INFO Connected all rings +gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub018:1650754:1650831 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub018:1650754:1650831 [1] NCCL INFO Connected all trees +gpub018:1650754:1650831 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub018:1650754:1650831 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub018:1650754:1650831 [1] NCCL INFO comm 0xa938d420 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub096:1645787:1645787 [3] NCCL INFO cudaDriverVersion 12010 +gpub096:1645787:1645787 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1645787:1645787 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1645787:1645858 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1645787:1645858 [3] NCCL INFO Using network IB +gpub096:1645787:1645858 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub096:1645787:1645858 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub096:1645787:1645858 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub096:1645787:1645858 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub096:1645787:1645858 [3] NCCL INFO Connected all rings +gpub096:1645787:1645858 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub096:1645787:1645858 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub096:1645787:1645858 [3] NCCL INFO Connected all trees +gpub096:1645787:1645858 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1645787:1645858 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1645787:1645858 [3] NCCL INFO comm 0xb78b6390 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub018:1650753:1650753 [0] NCCL INFO cudaDriverVersion 12010 +gpub018:1650753:1650753 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0> +gpub018:1650753:1650753 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub018:1650753:1650834 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0> +gpub018:1650753:1650834 [0] NCCL INFO Using network IB +gpub018:1650753:1650834 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub018:1650753:1650834 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub018:1650753:1650834 [0] NCCL INFO Connected all rings +gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub018:1650753:1650834 [0] NCCL INFO Connected all trees +gpub018:1650753:1650834 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub018:1650753:1650834 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub018:1650753:1650834 [0] NCCL INFO comm 0x4f7a1b90 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub097:1705868:1705868 [0] NCCL INFO cudaDriverVersion 12010 +gpub097:1705868:1705868 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0> +gpub097:1705868:1705868 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub097:1705868:1705958 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0> +gpub097:1705868:1705958 [0] NCCL INFO Using network IB +gpub097:1705868:1705958 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub097:1705868:1705958 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub097:1705868:1705958 [0] NCCL INFO Connected all rings +gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub097:1705868:1705958 [0] NCCL INFO Connected all trees +gpub097:1705868:1705958 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub097:1705868:1705958 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub097:1705868:1705958 [0] NCCL INFO comm 0x4f565ad0 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub030:2531971:2531971 [2] NCCL INFO cudaDriverVersion 12010 +gpub030:2531971:2531971 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2531971:2531971 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2531971:2532051 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2531971:2532051 [2] NCCL INFO Using network IB +gpub030:2531971:2532051 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub030:2531971:2532051 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub030:2531971:2532051 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub030:2531971:2532051 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub030:2531971:2532051 [2] NCCL INFO Connected all rings +gpub030:2531971:2532051 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub030:2531971:2532051 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub030:2531971:2532051 [2] NCCL INFO Connected all trees +gpub030:2531971:2532051 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2531971:2532051 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2531971:2532051 [2] NCCL INFO comm 0x8dd18cd0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub084:4052708:4052708 [0] NCCL INFO cudaDriverVersion 12010 +gpub084:4052708:4052708 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0> +gpub084:4052708:4052708 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub084:4052708:4052794 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0> +gpub084:4052708:4052794 [0] NCCL INFO Using network IB +gpub084:4052708:4052794 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub084:4052708:4052794 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub084:4052708:4052794 [0] NCCL INFO Connected all rings +gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub084:4052708:4052794 [0] NCCL INFO Connected all trees +gpub084:4052708:4052794 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub084:4052708:4052794 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub084:4052708:4052794 [0] NCCL INFO comm 0xb576c9d0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub096:1645786:1645786 [2] NCCL INFO cudaDriverVersion 12010 +gpub096:1645786:1645786 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1645786:1645786 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1645786:1645857 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1645786:1645857 [2] NCCL INFO Using network IB +gpub096:1645786:1645857 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub096:1645786:1645857 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub096:1645786:1645857 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub096:1645786:1645857 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub096:1645786:1645857 [2] NCCL INFO Connected all rings +gpub096:1645786:1645857 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub096:1645786:1645857 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub096:1645786:1645857 [2] NCCL INFO Connected all trees +gpub096:1645786:1645857 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1645786:1645857 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1645786:1645857 [2] NCCL INFO comm 0x4fe9cb90 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub097:1705869:1705869 [1] NCCL INFO cudaDriverVersion 12010 +gpub097:1705869:1705869 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0> +gpub097:1705869:1705869 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub097:1705869:1705955 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0> +gpub097:1705869:1705955 [1] NCCL INFO Using network IB +gpub097:1705869:1705955 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub097:1705869:1705955 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub097:1705869:1705955 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub097:1705869:1705955 [1] NCCL INFO Connected all rings +gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub097:1705869:1705955 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub097:1705869:1705955 [1] NCCL INFO Connected all trees +gpub097:1705869:1705955 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub097:1705869:1705955 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub097:1705869:1705955 [1] NCCL INFO comm 0x8e89510 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub040:2093691:2093691 [1] NCCL INFO cudaDriverVersion 12010 +gpub040:2093691:2093691 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2093691:2093691 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2093691:2093774 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2093691:2093774 [1] NCCL INFO Using network IB +gpub040:2093691:2093774 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub040:2093691:2093774 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub040:2093691:2093774 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub040:2093691:2093774 [1] NCCL INFO Connected all rings +gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub040:2093691:2093774 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub040:2093691:2093774 [1] NCCL INFO Connected all trees +gpub040:2093691:2093774 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub040:2093691:2093774 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2093691:2093774 [1] NCCL INFO comm 0xb9336880 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub096:1645785:1645785 [1] NCCL INFO cudaDriverVersion 12010 +gpub096:1645785:1645785 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1645785:1645785 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1645785:1645855 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1645785:1645855 [1] NCCL INFO Using network IB +gpub096:1645785:1645855 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub096:1645785:1645855 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub096:1645785:1645855 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub096:1645785:1645855 [1] NCCL INFO Connected all rings +gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub096:1645785:1645855 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub096:1645785:1645855 [1] NCCL INFO Connected all trees +gpub096:1645785:1645855 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1645785:1645855 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1645785:1645855 [1] NCCL INFO comm 0x50f7e840 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub014:1495254:1495254 [0] NCCL INFO cudaDriverVersion 12010 +gpub014:1495254:1495254 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0> +gpub014:1495254:1495254 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub014:1495254:1495329 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0> +gpub014:1495254:1495329 [0] NCCL INFO Using network IB +gpub014:1495254:1495329 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub014:1495254:1495329 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub014:1495254:1495329 [0] NCCL INFO Connected all rings +gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub014:1495254:1495329 [0] NCCL INFO Connected all trees +gpub014:1495254:1495329 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub014:1495254:1495329 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub014:1495254:1495329 [0] NCCL INFO comm 0x50fe0a80 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub040:2093693:2093693 [3] NCCL INFO cudaDriverVersion 12010 +gpub040:2093693:2093693 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0> +gpub040:2093693:2093693 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub040:2093693:2093773 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0> +gpub040:2093693:2093773 [3] NCCL INFO Using network IB +gpub040:2093693:2093773 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub040:2093693:2093773 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub040:2093693:2093773 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub040:2093693:2093773 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub040:2093693:2093773 [3] NCCL INFO Connected all rings +gpub040:2093693:2093773 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub040:2093693:2093773 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub040:2093693:2093773 [3] NCCL INFO Connected all trees +gpub040:2093693:2093773 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub040:2093693:2093773 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub040:2093693:2093773 [3] NCCL INFO comm 0xbd6eac10 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub005:0/64] 2023-07-07 20:14:03,233 (trainer:732) INFO: 23epoch:train:1-100batch: iter_time=1.234, forward_time=0.245, loss_ctc=73.225, loss_att=57.657, acc=0.705, loss=62.327, backward_time=1.047, grad_norm=105.098, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.184, optim0_lr0=7.714e-05, train_time=5.552 +[gpub005:0/64] 2023-07-07 20:16:18,832 (trainer:732) INFO: 23epoch:train:101-200batch: iter_time=1.217e-04, forward_time=0.142, loss_ctc=66.064, loss_att=55.513, acc=0.684, loss=58.678, backward_time=1.027, grad_norm=122.006, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.712e-05, train_time=2.713 +[gpub005:0/64] 2023-07-07 20:18:34,753 (trainer:732) INFO: 23epoch:train:201-300batch: iter_time=1.234e-04, forward_time=0.143, loss_ctc=92.044, loss_att=64.944, acc=0.703, loss=73.074, backward_time=1.025, grad_norm=143.859, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.711e-05, train_time=2.718 +[gpub005:0/64] 2023-07-07 20:20:49,996 (trainer:732) INFO: 23epoch:train:301-400batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=74.575, loss_att=60.629, acc=0.698, loss=64.813, backward_time=1.025, grad_norm=150.059, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.709e-05, train_time=2.705 +[gpub005:0/64] 2023-07-07 20:23:05,113 (trainer:732) INFO: 23epoch:train:401-500batch: iter_time=1.261e-04, forward_time=0.142, loss_ctc=80.331, loss_att=61.701, acc=0.708, loss=67.290, backward_time=1.024, grad_norm=120.003, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.707e-05, train_time=2.702 +[gpub005:0/64] 2023-07-07 20:25:19,924 (trainer:732) INFO: 23epoch:train:501-600batch: iter_time=1.247e-04, forward_time=0.142, loss_ctc=70.043, loss_att=53.869, acc=0.699, loss=58.721, backward_time=1.021, grad_norm=109.628, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.705e-05, train_time=2.696 +[gpub005:0/64] 2023-07-07 20:27:40,430 (trainer:732) INFO: 23epoch:train:601-700batch: iter_time=1.208e-04, forward_time=0.143, loss_ctc=83.967, loss_att=61.947, acc=0.692, loss=68.553, backward_time=1.033, grad_norm=141.886, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.703e-05, train_time=2.810 +[gpub005:0/64] 2023-07-07 20:30:06,403 (trainer:732) INFO: 23epoch:train:701-800batch: iter_time=1.237e-04, forward_time=0.142, loss_ctc=75.476, loss_att=56.916, acc=0.696, loss=62.484, backward_time=1.034, grad_norm=118.855, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.701e-05, train_time=2.919 +[gpub005:0/64] 2023-07-07 20:31:01,886 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-07 20:31:19,436 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:31:22,919 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:31:22,919 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-07 20:31:22,925 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 20:37:15,137 (trainer:732) INFO: 23epoch:train:801-900batch: iter_time=1.375, forward_time=0.172, loss_ctc=72.566, loss_att=53.768, acc=0.698, loss=59.408, backward_time=1.047, grad_norm=114.089, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.182, optim0_lr0=7.700e-05, train_time=8.574 +[gpub005:0/64] 2023-07-07 20:39:31,293 (trainer:732) INFO: 23epoch:train:901-1000batch: iter_time=1.203e-04, forward_time=0.146, loss_ctc=65.125, loss_att=51.474, acc=0.694, loss=55.569, backward_time=1.026, grad_norm=100.552, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.182, optim0_lr0=7.698e-05, train_time=2.724 +[gpub005:0/64] 2023-07-07 20:41:47,156 (trainer:732) INFO: 23epoch:train:1001-1100batch: iter_time=1.348e-04, forward_time=0.146, loss_ctc=83.846, loss_att=65.257, acc=0.696, loss=70.834, backward_time=1.027, grad_norm=117.681, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.696e-05, train_time=2.717 +[gpub005:0/64] 2023-07-07 20:44:02,949 (trainer:732) INFO: 23epoch:train:1101-1200batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=75.335, loss_att=57.448, acc=0.703, loss=62.814, backward_time=1.027, grad_norm=97.808, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.694e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 20:46:19,227 (trainer:732) INFO: 23epoch:train:1201-1300batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=78.643, loss_att=64.187, acc=0.709, loss=68.524, backward_time=1.029, grad_norm=106.806, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.692e-05, train_time=2.725 +[gpub005:0/64] 2023-07-07 20:48:36,388 (trainer:732) INFO: 23epoch:train:1301-1400batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=67.582, loss_att=50.486, acc=0.715, loss=55.615, backward_time=1.026, grad_norm=134.933, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.690e-05, train_time=2.743 +[gpub005:0/64] 2023-07-07 20:50:52,551 (trainer:732) INFO: 23epoch:train:1401-1500batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=80.110, loss_att=60.648, acc=0.688, loss=66.487, backward_time=1.028, grad_norm=128.046, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.689e-05, train_time=2.723 +[gpub005:0/64] 2023-07-07 20:53:11,035 (trainer:732) INFO: 23epoch:train:1501-1600batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=70.534, loss_att=57.154, acc=0.699, loss=61.168, backward_time=1.027, grad_norm=102.436, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.687e-05, train_time=2.769 +[gpub005:0/64] 2023-07-07 20:54:56,286 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-07 20:55:14,037 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 20:55:17,531 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 20:55:17,531 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-07 20:55:17,538 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 21:00:02,783 (trainer:732) INFO: 23epoch:train:1601-1700batch: iter_time=2.623, forward_time=0.186, loss_ctc=70.658, loss_att=52.058, acc=0.699, loss=57.638, backward_time=1.034, grad_norm=90.957, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.183, optim0_lr0=7.685e-05, train_time=8.235 +[gpub005:0/64] 2023-07-07 21:02:18,930 (trainer:732) INFO: 23epoch:train:1701-1800batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=70.962, loss_att=57.990, acc=0.707, loss=61.882, backward_time=1.026, grad_norm=106.925, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.683e-05, train_time=2.723 +[gpub005:0/64] 2023-07-07 21:04:34,887 (trainer:732) INFO: 23epoch:train:1801-1900batch: iter_time=1.197e-04, forward_time=0.143, loss_ctc=73.987, loss_att=55.470, acc=0.695, loss=61.025, backward_time=1.025, grad_norm=128.929, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.681e-05, train_time=2.719 +[gpub005:0/64] 2023-07-07 21:06:50,806 (trainer:732) INFO: 23epoch:train:1901-2000batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=84.894, loss_att=66.262, acc=0.695, loss=71.851, backward_time=1.024, grad_norm=114.319, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.680e-05, train_time=2.718 +[gpub005:0/64] 2023-07-07 21:09:06,612 (trainer:732) INFO: 23epoch:train:2001-2100batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=72.489, loss_att=59.553, acc=0.708, loss=63.434, backward_time=1.026, grad_norm=101.371, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.678e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 21:11:22,119 (trainer:732) INFO: 23epoch:train:2101-2200batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=71.626, loss_att=54.390, acc=0.708, loss=59.561, backward_time=1.025, grad_norm=107.121, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.676e-05, train_time=2.710 +[gpub005:0/64] 2023-07-07 21:13:37,533 (trainer:732) INFO: 23epoch:train:2201-2300batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=79.447, loss_att=59.205, acc=0.696, loss=65.278, backward_time=1.023, grad_norm=121.579, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.674e-05, train_time=2.708 +[gpub005:0/64] 2023-07-07 21:15:53,132 (trainer:732) INFO: 23epoch:train:2301-2400batch: iter_time=1.182e-04, forward_time=0.144, loss_ctc=75.627, loss_att=58.332, acc=0.698, loss=63.520, backward_time=1.024, grad_norm=126.568, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.672e-05, train_time=2.712 +[gpub005:0/64] 2023-07-07 21:18:08,467 (trainer:732) INFO: 23epoch:train:2401-2500batch: iter_time=1.198e-04, forward_time=0.144, loss_ctc=65.804, loss_att=53.629, acc=0.695, loss=57.282, backward_time=1.023, grad_norm=93.403, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.670e-05, train_time=2.706 +[gpub005:0/64] 2023-07-07 21:18:11,282 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-07 21:18:29,130 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 21:18:32,569 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 21:18:32,569 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-07 21:18:32,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 21:24:42,763 (trainer:732) INFO: 23epoch:train:2501-2600batch: iter_time=1.242, forward_time=0.174, loss_ctc=73.983, loss_att=58.373, acc=0.706, loss=63.056, backward_time=1.034, grad_norm=100.591, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.183, optim0_lr0=7.669e-05, train_time=7.886 +[gpub005:0/64] 2023-07-07 21:26:58,297 (trainer:732) INFO: 23epoch:train:2601-2700batch: iter_time=1.190e-04, forward_time=0.144, loss_ctc=64.347, loss_att=52.941, acc=0.692, loss=56.362, backward_time=1.022, grad_norm=103.514, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.667e-05, train_time=2.710 +[gpub005:0/64] 2023-07-07 21:29:14,466 (trainer:732) INFO: 23epoch:train:2701-2800batch: iter_time=1.356e-04, forward_time=0.146, loss_ctc=86.482, loss_att=62.946, acc=0.706, loss=70.007, backward_time=1.028, grad_norm=116.532, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.665e-05, train_time=2.723 +[gpub005:0/64] 2023-07-07 21:31:30,282 (trainer:732) INFO: 23epoch:train:2801-2900batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=75.022, loss_att=60.111, acc=0.700, loss=64.585, backward_time=1.027, grad_norm=102.153, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.663e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 21:33:45,972 (trainer:732) INFO: 23epoch:train:2901-3000batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=79.575, loss_att=61.287, acc=0.710, loss=66.773, backward_time=1.024, grad_norm=116.778, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.661e-05, train_time=2.714 +[gpub005:0/64] 2023-07-07 21:36:01,542 (trainer:732) INFO: 23epoch:train:3001-3100batch: iter_time=1.329e-04, forward_time=0.144, loss_ctc=66.483, loss_att=51.229, acc=0.704, loss=55.805, backward_time=1.024, grad_norm=92.400, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.660e-05, train_time=2.711 +[gpub005:0/64] 2023-07-07 21:38:17,343 (trainer:732) INFO: 23epoch:train:3101-3200batch: iter_time=1.364e-04, forward_time=0.146, loss_ctc=78.366, loss_att=58.791, acc=0.696, loss=64.664, backward_time=1.027, grad_norm=123.479, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.658e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 21:40:32,848 (trainer:732) INFO: 23epoch:train:3201-3300batch: iter_time=1.406e-04, forward_time=0.144, loss_ctc=70.759, loss_att=54.241, acc=0.709, loss=59.197, backward_time=1.024, grad_norm=101.288, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.656e-05, train_time=2.710 +[gpub005:0/64] 2023-07-07 21:41:20,432 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-07 21:41:38,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 21:41:42,003 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 21:41:42,004 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-07 21:41:42,010 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 21:48:03,969 (trainer:732) INFO: 23epoch:train:3301-3400batch: iter_time=1.232, forward_time=0.144, loss_ctc=68.130, loss_att=52.958, acc=0.698, loss=57.509, backward_time=1.041, grad_norm=100.963, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.180, optim0_lr0=7.654e-05, train_time=9.022 +[gpub005:0/64] 2023-07-07 21:50:20,129 (trainer:732) INFO: 23epoch:train:3401-3500batch: iter_time=1.270e-04, forward_time=0.144, loss_ctc=69.763, loss_att=54.215, acc=0.711, loss=58.879, backward_time=1.024, grad_norm=98.620, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.653e-05, train_time=2.723 +[gpub005:0/64] 2023-07-07 21:52:37,246 (trainer:732) INFO: 23epoch:train:3501-3600batch: iter_time=1.350e-04, forward_time=0.146, loss_ctc=72.406, loss_att=56.129, acc=0.702, loss=61.013, backward_time=1.029, grad_norm=107.814, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.651e-05, train_time=2.742 +[gpub005:0/64] 2023-07-07 21:54:53,208 (trainer:732) INFO: 23epoch:train:3601-3700batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=85.578, loss_att=64.328, acc=0.701, loss=70.703, backward_time=1.026, grad_norm=95.956, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.649e-05, train_time=2.719 +[gpub005:0/64] 2023-07-07 21:57:09,044 (trainer:732) INFO: 23epoch:train:3701-3800batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=74.007, loss_att=59.821, acc=0.714, loss=64.077, backward_time=1.026, grad_norm=88.845, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.647e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 21:59:39,441 (trainer:732) INFO: 23epoch:train:3801-3900batch: iter_time=1.029e-04, forward_time=0.143, loss_ctc=67.054, loss_att=49.799, acc=0.712, loss=54.975, backward_time=1.033, grad_norm=97.210, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.645e-05, train_time=3.008 +[gpub005:0/64] 2023-07-07 22:01:56,581 (trainer:732) INFO: 23epoch:train:3901-4000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=78.400, loss_att=56.731, acc=0.705, loss=63.232, backward_time=1.027, grad_norm=108.203, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.644e-05, train_time=2.743 +[gpub005:0/64] 2023-07-07 22:04:12,336 (trainer:732) INFO: 23epoch:train:4001-4100batch: iter_time=1.080e-04, forward_time=0.144, loss_ctc=74.332, loss_att=57.767, acc=0.700, loss=62.737, backward_time=1.025, grad_norm=119.370, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.642e-05, train_time=2.715 +[gpub005:0/64] 2023-07-07 22:05:44,331 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-07 22:06:02,568 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 22:06:05,982 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 22:06:05,982 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-07 22:06:05,988 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 22:10:46,670 (trainer:732) INFO: 23epoch:train:4101-4200batch: iter_time=1.259, forward_time=0.154, loss_ctc=66.157, loss_att=48.178, acc=0.703, loss=53.572, backward_time=1.035, grad_norm=89.639, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.640e-05, train_time=7.886 +[gpub005:0/64] 2023-07-07 22:13:02,991 (trainer:732) INFO: 23epoch:train:4201-4300batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=69.203, loss_att=58.283, acc=0.704, loss=61.559, backward_time=1.026, grad_norm=86.447, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.638e-05, train_time=2.727 +[gpub005:0/64] 2023-07-07 22:15:18,474 (trainer:732) INFO: 23epoch:train:4301-4400batch: iter_time=1.017e-04, forward_time=0.143, loss_ctc=71.569, loss_att=54.837, acc=0.692, loss=59.857, backward_time=1.023, grad_norm=112.955, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.636e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 22:17:33,912 (trainer:732) INFO: 23epoch:train:4401-4500batch: iter_time=1.015e-04, forward_time=0.143, loss_ctc=83.345, loss_att=66.902, acc=0.686, loss=71.835, backward_time=1.023, grad_norm=107.092, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.635e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 22:19:49,701 (trainer:732) INFO: 23epoch:train:4501-4600batch: iter_time=1.031e-04, forward_time=0.144, loss_ctc=72.806, loss_att=58.981, acc=0.706, loss=63.129, backward_time=1.024, grad_norm=108.783, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.633e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 22:22:05,139 (trainer:732) INFO: 23epoch:train:4601-4700batch: iter_time=1.114e-04, forward_time=0.143, loss_ctc=71.685, loss_att=55.501, acc=0.700, loss=60.356, backward_time=1.022, grad_norm=110.110, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.631e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 22:24:20,485 (trainer:732) INFO: 23epoch:train:4701-4800batch: iter_time=1.072e-04, forward_time=0.143, loss_ctc=79.087, loss_att=57.787, acc=0.699, loss=64.177, backward_time=1.021, grad_norm=100.805, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.629e-05, train_time=2.707 +[gpub005:0/64] 2023-07-07 22:26:35,978 (trainer:732) INFO: 23epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.144, loss_ctc=73.503, loss_att=58.364, acc=0.689, loss=62.906, backward_time=1.023, grad_norm=109.600, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.628e-05, train_time=2.710 +[gpub005:0/64] 2023-07-07 22:28:51,076 (trainer:732) INFO: 23epoch:train:4901-5000batch: iter_time=1.202e-04, forward_time=0.143, loss_ctc=64.689, loss_att=54.362, acc=0.690, loss=57.460, backward_time=1.021, grad_norm=152.209, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.626e-05, train_time=2.702 +[gpub005:0/64] 2023-07-07 22:28:55,889 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-07 22:29:14,276 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 22:29:17,689 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 22:29:17,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-07 22:29:17,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 22:37:00,414 (trainer:732) INFO: 23epoch:train:5001-5100batch: iter_time=1.337, forward_time=0.170, loss_ctc=73.081, loss_att=56.227, acc=0.704, loss=61.283, backward_time=1.034, grad_norm=153.240, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.624e-05, train_time=9.787 +[gpub005:0/64] 2023-07-07 22:39:16,136 (trainer:732) INFO: 23epoch:train:5101-5200batch: iter_time=1.064e-04, forward_time=0.145, loss_ctc=64.449, loss_att=53.017, acc=0.688, loss=56.447, backward_time=1.022, grad_norm=97.208, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.622e-05, train_time=2.714 +[gpub005:0/64] 2023-07-07 22:41:31,530 (trainer:732) INFO: 23epoch:train:5201-5300batch: iter_time=1.159e-04, forward_time=0.144, loss_ctc=84.647, loss_att=63.396, acc=0.696, loss=69.772, backward_time=1.022, grad_norm=129.990, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.620e-05, train_time=2.708 +[gpub005:0/64] 2023-07-07 22:43:47,077 (trainer:732) INFO: 23epoch:train:5301-5400batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=71.859, loss_att=58.614, acc=0.699, loss=62.588, backward_time=1.024, grad_norm=94.894, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.619e-05, train_time=2.711 +[gpub005:0/64] 2023-07-07 22:46:02,833 (trainer:732) INFO: 23epoch:train:5401-5500batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=77.229, loss_att=59.686, acc=0.711, loss=64.949, backward_time=1.026, grad_norm=107.512, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.182, optim0_lr0=7.617e-05, train_time=2.715 +[gpub005:0/64] 2023-07-07 22:48:18,143 (trainer:732) INFO: 23epoch:train:5501-5600batch: iter_time=1.116e-04, forward_time=0.144, loss_ctc=67.891, loss_att=54.609, acc=0.697, loss=58.593, backward_time=1.023, grad_norm=102.083, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.615e-05, train_time=2.706 +[gpub005:0/64] 2023-07-07 22:50:34,120 (trainer:732) INFO: 23epoch:train:5601-5700batch: iter_time=1.281e-04, forward_time=0.147, loss_ctc=77.159, loss_att=58.325, acc=0.695, loss=63.975, backward_time=1.027, grad_norm=116.273, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.613e-05, train_time=2.719 +[gpub005:0/64] 2023-07-07 22:52:49,277 (trainer:732) INFO: 23epoch:train:5701-5800batch: iter_time=1.458e-04, forward_time=0.144, loss_ctc=71.209, loss_att=54.625, acc=0.696, loss=59.601, backward_time=1.023, grad_norm=92.611, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.612e-05, train_time=2.703 +[gpub005:0/64] 2023-07-07 22:53:48,189 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-07 22:54:06,512 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 22:54:10,013 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 22:54:10,013 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-07 22:54:10,019 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 22:58:51,643 (trainer:732) INFO: 23epoch:train:5801-5900batch: iter_time=2.200, forward_time=0.169, loss_ctc=67.539, loss_att=50.843, acc=0.696, loss=55.852, backward_time=1.035, grad_norm=101.336, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.183, optim0_lr0=7.610e-05, train_time=7.247 +[gpub005:0/64] 2023-07-07 23:01:06,844 (trainer:732) INFO: 23epoch:train:5901-6000batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=69.923, loss_att=54.611, acc=0.709, loss=59.204, backward_time=1.021, grad_norm=89.503, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.608e-05, train_time=2.704 +[gpub005:0/64] 2023-07-07 23:03:22,210 (trainer:732) INFO: 23epoch:train:6001-6100batch: iter_time=1.177e-04, forward_time=0.143, loss_ctc=73.309, loss_att=56.219, acc=0.695, loss=61.346, backward_time=1.023, grad_norm=108.599, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.606e-05, train_time=2.707 +[gpub005:0/64] 2023-07-07 23:05:37,636 (trainer:732) INFO: 23epoch:train:6101-6200batch: iter_time=1.113e-04, forward_time=0.144, loss_ctc=82.117, loss_att=62.696, acc=0.695, loss=68.522, backward_time=1.023, grad_norm=97.686, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.605e-05, train_time=2.708 +[gpub005:0/64] 2023-07-07 23:07:53,122 (trainer:732) INFO: 23epoch:train:6201-6300batch: iter_time=1.115e-04, forward_time=0.143, loss_ctc=75.087, loss_att=60.028, acc=0.711, loss=64.546, backward_time=1.024, grad_norm=97.876, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.603e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 23:10:08,013 (trainer:732) INFO: 23epoch:train:6301-6400batch: iter_time=1.160e-04, forward_time=0.142, loss_ctc=66.502, loss_att=51.440, acc=0.704, loss=55.959, backward_time=1.020, grad_norm=103.605, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.601e-05, train_time=2.698 +[gpub005:0/64] 2023-07-07 23:12:24,456 (trainer:732) INFO: 23epoch:train:6401-6500batch: iter_time=1.169e-04, forward_time=0.143, loss_ctc=78.370, loss_att=56.212, acc=0.703, loss=62.859, backward_time=1.026, grad_norm=104.276, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.599e-05, train_time=2.729 +[gpub005:0/64] 2023-07-07 23:14:39,768 (trainer:732) INFO: 23epoch:train:6501-6600batch: iter_time=1.029e-04, forward_time=0.143, loss_ctc=75.661, loss_att=58.381, acc=0.689, loss=63.565, backward_time=1.022, grad_norm=120.247, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.180, optim0_lr0=7.598e-05, train_time=2.706 +[gpub005:0/64] 2023-07-07 23:16:16,616 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-07 23:16:34,573 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 23:16:37,987 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 23:16:37,987 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-07 23:16:37,993 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 23:21:16,629 (trainer:732) INFO: 23epoch:train:6601-6700batch: iter_time=1.778, forward_time=0.153, loss_ctc=64.231, loss_att=49.639, acc=0.702, loss=54.016, backward_time=1.033, grad_norm=111.751, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.596e-05, train_time=7.937 +[gpub005:0/64] 2023-07-07 23:23:32,863 (trainer:732) INFO: 23epoch:train:6701-6800batch: iter_time=1.291e-04, forward_time=0.144, loss_ctc=69.473, loss_att=58.857, acc=0.712, loss=62.042, backward_time=1.027, grad_norm=95.279, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.594e-05, train_time=2.724 +[gpub005:0/64] 2023-07-07 23:25:48,593 (trainer:732) INFO: 23epoch:train:6801-6900batch: iter_time=1.190e-04, forward_time=0.145, loss_ctc=67.596, loss_att=53.379, acc=0.699, loss=57.644, backward_time=1.025, grad_norm=117.466, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.592e-05, train_time=2.714 +[gpub005:0/64] 2023-07-07 23:28:04,655 (trainer:732) INFO: 23epoch:train:6901-7000batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=86.976, loss_att=65.598, acc=0.698, loss=72.012, backward_time=1.028, grad_norm=129.974, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.591e-05, train_time=2.721 +[gpub005:0/64] 2023-07-07 23:30:20,292 (trainer:732) INFO: 23epoch:train:7001-7100batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=67.406, loss_att=49.368, acc=0.731, loss=54.780, backward_time=1.026, grad_norm=112.193, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.589e-05, train_time=2.713 +[gpub005:0/64] 2023-07-07 23:32:36,124 (trainer:732) INFO: 23epoch:train:7101-7200batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=76.719, loss_att=60.990, acc=0.711, loss=65.708, backward_time=1.026, grad_norm=110.570, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.587e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 23:34:51,928 (trainer:732) INFO: 23epoch:train:7201-7300batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=74.869, loss_att=56.094, acc=0.706, loss=61.727, backward_time=1.025, grad_norm=110.042, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.585e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 23:37:07,590 (trainer:732) INFO: 23epoch:train:7301-7400batch: iter_time=1.190e-04, forward_time=0.144, loss_ctc=73.682, loss_att=57.404, acc=0.694, loss=62.287, backward_time=1.025, grad_norm=131.163, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.584e-05, train_time=2.713 +[gpub005:0/64] 2023-07-07 23:39:22,887 (trainer:732) INFO: 23epoch:train:7401-7500batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=66.339, loss_att=49.969, acc=0.710, loss=54.880, backward_time=1.023, grad_norm=103.995, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.582e-05, train_time=2.706 +[gpub005:0/64] 2023-07-07 23:39:24,295 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-07 23:39:42,594 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-07 23:39:46,092 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-07 23:39:46,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-07 23:39:46,098 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-07 23:45:40,419 (trainer:732) INFO: 23epoch:train:7501-7600batch: iter_time=1.251, forward_time=0.163, loss_ctc=71.968, loss_att=56.021, acc=0.710, loss=60.805, backward_time=1.036, grad_norm=115.094, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.580e-05, train_time=7.550 +[gpub005:0/64] 2023-07-07 23:47:56,185 (trainer:732) INFO: 23epoch:train:7601-7700batch: iter_time=1.157e-04, forward_time=0.144, loss_ctc=64.028, loss_att=53.023, acc=0.693, loss=56.324, backward_time=1.023, grad_norm=96.768, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.578e-05, train_time=2.715 +[gpub005:0/64] 2023-07-07 23:50:12,012 (trainer:732) INFO: 23epoch:train:7701-7800batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=82.929, loss_att=60.230, acc=0.703, loss=67.040, backward_time=1.027, grad_norm=101.875, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.577e-05, train_time=2.716 +[gpub005:0/64] 2023-07-07 23:52:27,130 (trainer:732) INFO: 23epoch:train:7801-7900batch: iter_time=1.379e-04, forward_time=0.144, loss_ctc=73.151, loss_att=59.489, acc=0.697, loss=63.588, backward_time=1.021, grad_norm=94.433, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.575e-05, train_time=2.702 +[gpub005:0/64] 2023-07-07 23:54:42,527 (trainer:732) INFO: 23epoch:train:7901-8000batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=77.527, loss_att=59.793, acc=0.707, loss=65.113, backward_time=1.024, grad_norm=97.630, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.573e-05, train_time=2.708 +[gpub005:0/64] 2023-07-07 23:56:57,972 (trainer:732) INFO: 23epoch:train:8001-8100batch: iter_time=1.089e-04, forward_time=0.143, loss_ctc=67.062, loss_att=52.670, acc=0.703, loss=56.987, backward_time=1.024, grad_norm=91.986, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.571e-05, train_time=2.709 +[gpub005:0/64] 2023-07-07 23:59:13,305 (trainer:732) INFO: 23epoch:train:8101-8200batch: iter_time=1.038e-04, forward_time=0.144, loss_ctc=77.397, loss_att=56.849, acc=0.699, loss=63.014, backward_time=1.023, grad_norm=101.149, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.570e-05, train_time=2.706 +[gpub005:0/64] 2023-07-08 00:01:28,395 (trainer:732) INFO: 23epoch:train:8201-8300batch: iter_time=1.169e-04, forward_time=0.143, loss_ctc=70.457, loss_att=54.556, acc=0.695, loss=59.327, backward_time=1.022, grad_norm=87.648, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.568e-05, train_time=2.702 +[gpub005:0/64] 2023-07-08 00:02:14,379 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 00:02:33,090 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 00:02:36,612 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 00:02:36,612 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 00:02:36,618 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 00:08:27,251 (trainer:732) INFO: 23epoch:train:8301-8400batch: iter_time=1.283, forward_time=0.155, loss_ctc=69.792, loss_att=51.702, acc=0.705, loss=57.129, backward_time=1.036, grad_norm=94.302, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.183, optim0_lr0=7.566e-05, train_time=8.377 +[gpub005:0/64] 2023-07-08 00:10:43,043 (trainer:732) INFO: 23epoch:train:8401-8500batch: iter_time=1.183e-04, forward_time=0.143, loss_ctc=64.409, loss_att=49.778, acc=0.698, loss=54.167, backward_time=1.024, grad_norm=88.422, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.564e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 00:12:58,756 (trainer:732) INFO: 23epoch:train:8501-8600batch: iter_time=1.104e-04, forward_time=0.143, loss_ctc=80.753, loss_att=62.383, acc=0.696, loss=67.894, backward_time=1.023, grad_norm=120.710, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.563e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 00:15:15,421 (trainer:732) INFO: 23epoch:train:8601-8700batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=72.795, loss_att=55.598, acc=0.703, loss=60.757, backward_time=1.026, grad_norm=94.499, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.561e-05, train_time=2.733 +[gpub005:0/64] 2023-07-08 00:17:31,354 (trainer:732) INFO: 23epoch:train:8701-8800batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=76.197, loss_att=62.131, acc=0.706, loss=66.351, backward_time=1.028, grad_norm=103.489, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.559e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 00:19:46,862 (trainer:732) INFO: 23epoch:train:8801-8900batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=66.628, loss_att=51.943, acc=0.710, loss=56.349, backward_time=1.025, grad_norm=99.280, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.558e-05, train_time=2.710 +[gpub005:0/64] 2023-07-08 00:22:10,051 (trainer:732) INFO: 23epoch:train:8901-9000batch: iter_time=1.057e-04, forward_time=0.145, loss_ctc=76.575, loss_att=56.267, acc=0.696, loss=62.360, backward_time=1.039, grad_norm=108.949, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.556e-05, train_time=2.864 +[gpub005:0/64] 2023-07-08 00:24:25,632 (trainer:732) INFO: 23epoch:train:9001-9100batch: iter_time=9.660e-05, forward_time=0.144, loss_ctc=69.003, loss_att=54.450, acc=0.697, loss=58.816, backward_time=1.024, grad_norm=89.585, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.554e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 00:25:57,557 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 00:26:15,971 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 00:26:19,431 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 00:26:19,432 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 00:26:19,438 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 00:30:19,119 (trainer:732) INFO: 23epoch:train:9101-9200batch: iter_time=1.292, forward_time=0.171, loss_ctc=67.027, loss_att=52.830, acc=0.700, loss=57.089, backward_time=1.038, grad_norm=110.918, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.552e-05, train_time=7.069 +[gpub005:0/64] 2023-07-08 00:32:58,750 (trainer:732) INFO: 23epoch:train:9201-9300batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=68.474, loss_att=55.236, acc=0.708, loss=59.208, backward_time=1.045, grad_norm=95.325, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.551e-05, train_time=3.193 +[gpub005:0/64] 2023-07-08 00:35:17,283 (trainer:732) INFO: 23epoch:train:9301-9400batch: iter_time=1.271e-04, forward_time=0.144, loss_ctc=67.455, loss_att=52.020, acc=0.693, loss=56.651, backward_time=1.026, grad_norm=96.518, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.549e-05, train_time=2.770 +[gpub005:0/64] 2023-07-08 00:37:36,935 (trainer:732) INFO: 23epoch:train:9401-9500batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=85.642, loss_att=64.226, acc=0.699, loss=70.651, backward_time=1.034, grad_norm=105.352, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.547e-05, train_time=2.793 +[gpub005:0/64] 2023-07-08 00:40:08,217 (trainer:732) INFO: 23epoch:train:9501-9600batch: iter_time=1.427e-04, forward_time=0.143, loss_ctc=65.434, loss_att=49.351, acc=0.723, loss=54.176, backward_time=1.050, grad_norm=100.205, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.545e-05, train_time=3.025 +[gpub005:0/64] 2023-07-08 00:42:32,011 (trainer:732) INFO: 23epoch:train:9601-9700batch: iter_time=1.393e-04, forward_time=0.145, loss_ctc=75.234, loss_att=58.800, acc=0.713, loss=63.730, backward_time=1.038, grad_norm=121.320, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.544e-05, train_time=2.876 +[gpub005:0/64] 2023-07-08 00:44:48,244 (trainer:732) INFO: 23epoch:train:9701-9800batch: iter_time=1.236e-04, forward_time=0.143, loss_ctc=74.479, loss_att=58.067, acc=0.696, loss=62.991, backward_time=1.022, grad_norm=101.161, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.542e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 00:47:03,649 (trainer:732) INFO: 23epoch:train:9801-9900batch: iter_time=1.282e-04, forward_time=0.144, loss_ctc=72.052, loss_att=55.612, acc=0.692, loss=60.544, backward_time=1.023, grad_norm=96.421, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.540e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 00:49:18,598 (trainer:732) INFO: 23epoch:train:9901-10000batch: iter_time=1.172e-04, forward_time=0.143, loss_ctc=65.433, loss_att=49.258, acc=0.705, loss=54.110, backward_time=1.020, grad_norm=96.484, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.180, optim0_lr0=7.539e-05, train_time=2.699 +[gpub005:0/64] 2023-07-08 01:02:22,452 (trainer:338) INFO: 23epoch results: [train] iter_time=0.181, forward_time=0.147, loss_ctc=73.369, loss_att=56.779, acc=0.701, loss=61.756, backward_time=1.027, grad_norm=108.052, clip=100.000, loss_scale=7.674e+20, optim_step_time=0.181, optim0_lr0=7.625e-05, train_time=3.358, time=4 hours, 40 minutes and 12.58 seconds, total_count=200000, gpu_max_cached_mem_GB=34.934, [valid] loss_ctc=49.622, cer_ctc=0.280, loss_att=39.678, acc=0.677, cer=0.355, wer=0.989, loss=42.661, time=6 minutes and 51.55 seconds, total_count=20746, gpu_max_cached_mem_GB=38.229, [att_plot] time=5 minutes and 52.75 seconds, total_count=0, gpu_max_cached_mem_GB=38.229 +[gpub005:0/64] 2023-07-08 01:02:38,315 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub005:0/64] 2023-07-08 01:02:38,323 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/15epoch.pth +[gpub005:0/64] 2023-07-08 01:02:38,323 (trainer:272) INFO: 24/30epoch started. Estimated time to finish: 1 day, 10 hours and 12 minutes +[gpub005:0/64] 2023-07-08 01:02:38,327 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 01:02:56,639 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 01:03:00,105 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 01:03:00,105 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 01:03:00,112 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 01:07:13,542 (trainer:732) INFO: 24epoch:train:1-100batch: iter_time=1.294, forward_time=0.197, loss_ctc=66.378, loss_att=54.002, acc=0.692, loss=57.715, backward_time=1.047, grad_norm=88.560, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.185, optim0_lr0=7.537e-05, train_time=5.504 +[gpub005:0/64] 2023-07-08 01:09:29,832 (trainer:732) INFO: 24epoch:train:101-200batch: iter_time=1.237e-04, forward_time=0.149, loss_ctc=75.563, loss_att=57.087, acc=0.702, loss=62.630, backward_time=1.029, grad_norm=107.763, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.535e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 01:11:47,280 (trainer:732) INFO: 24epoch:train:201-300batch: iter_time=1.254e-04, forward_time=0.156, loss_ctc=81.651, loss_att=60.133, acc=0.708, loss=66.588, backward_time=1.031, grad_norm=101.534, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.185, optim0_lr0=7.533e-05, train_time=2.749 +[gpub005:0/64] 2023-07-08 01:14:08,521 (trainer:732) INFO: 24epoch:train:301-400batch: iter_time=1.208e-04, forward_time=0.151, loss_ctc=74.333, loss_att=59.376, acc=0.682, loss=63.863, backward_time=1.045, grad_norm=90.752, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.186, optim0_lr0=7.532e-05, train_time=2.825 +[gpub005:0/64] 2023-07-08 01:16:26,875 (trainer:732) INFO: 24epoch:train:401-500batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=76.016, loss_att=57.684, acc=0.693, loss=63.183, backward_time=1.029, grad_norm=111.603, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.530e-05, train_time=2.767 +[gpub005:0/64] 2023-07-08 01:18:46,441 (trainer:732) INFO: 24epoch:train:501-600batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=75.027, loss_att=55.807, acc=0.692, loss=61.573, backward_time=1.033, grad_norm=97.334, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.528e-05, train_time=2.791 +[gpub005:0/64] 2023-07-08 01:21:21,783 (trainer:732) INFO: 24epoch:train:601-700batch: iter_time=1.134e-04, forward_time=0.168, loss_ctc=84.656, loss_att=61.793, acc=0.683, loss=68.652, backward_time=1.043, grad_norm=104.486, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.527e-05, train_time=3.107 +[gpub005:0/64] 2023-07-08 01:23:42,605 (trainer:732) INFO: 24epoch:train:701-800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=66.495, loss_att=52.472, acc=0.706, loss=56.679, backward_time=1.032, grad_norm=118.761, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.525e-05, train_time=2.816 +[gpub005:0/64] 2023-07-08 01:24:45,692 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 01:25:03,063 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 01:25:06,403 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 01:25:06,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 01:25:06,447 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 01:31:05,985 (trainer:732) INFO: 24epoch:train:801-900batch: iter_time=2.913, forward_time=0.166, loss_ctc=70.482, loss_att=54.429, acc=0.689, loss=59.245, backward_time=1.040, grad_norm=93.942, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.523e-05, train_time=8.867 +[gpub005:0/64] 2023-07-08 01:33:23,595 (trainer:732) INFO: 24epoch:train:901-1000batch: iter_time=1.271e-04, forward_time=0.146, loss_ctc=74.153, loss_att=57.876, acc=0.710, loss=62.759, backward_time=1.032, grad_norm=94.097, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.522e-05, train_time=2.752 +[gpub005:0/64] 2023-07-08 01:35:39,445 (trainer:732) INFO: 24epoch:train:1001-1100batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=82.574, loss_att=61.700, acc=0.704, loss=67.962, backward_time=1.028, grad_norm=112.668, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.520e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 01:37:55,479 (trainer:732) INFO: 24epoch:train:1101-1200batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=72.525, loss_att=54.759, acc=0.693, loss=60.089, backward_time=1.029, grad_norm=96.496, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.518e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 01:40:11,488 (trainer:732) INFO: 24epoch:train:1201-1300batch: iter_time=1.179e-04, forward_time=0.146, loss_ctc=70.836, loss_att=59.985, acc=0.697, loss=63.240, backward_time=1.029, grad_norm=86.809, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.516e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 01:42:27,421 (trainer:732) INFO: 24epoch:train:1301-1400batch: iter_time=1.202e-04, forward_time=0.147, loss_ctc=77.993, loss_att=57.963, acc=0.699, loss=63.972, backward_time=1.028, grad_norm=95.498, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.515e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 01:44:43,520 (trainer:732) INFO: 24epoch:train:1401-1500batch: iter_time=1.235e-04, forward_time=0.148, loss_ctc=81.518, loss_att=62.066, acc=0.696, loss=67.902, backward_time=1.030, grad_norm=106.767, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.513e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 01:46:59,478 (trainer:732) INFO: 24epoch:train:1501-1600batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=66.753, loss_att=53.187, acc=0.700, loss=57.257, backward_time=1.028, grad_norm=93.450, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.511e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 01:48:35,879 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 01:48:54,179 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 01:48:57,625 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 01:48:57,626 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 01:48:57,632 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 01:52:29,456 (trainer:732) INFO: 24epoch:train:1601-1700batch: iter_time=1.330, forward_time=0.147, loss_ctc=65.681, loss_att=48.919, acc=0.702, loss=53.948, backward_time=1.042, grad_norm=84.444, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.510e-05, train_time=6.599 +[gpub005:0/64] 2023-07-08 01:54:45,379 (trainer:732) INFO: 24epoch:train:1701-1800batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=74.623, loss_att=57.753, acc=0.695, loss=62.814, backward_time=1.027, grad_norm=97.777, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.508e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 01:57:01,045 (trainer:732) INFO: 24epoch:train:1801-1900batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=76.935, loss_att=56.373, acc=0.708, loss=62.542, backward_time=1.026, grad_norm=120.929, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.506e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 01:59:16,723 (trainer:732) INFO: 24epoch:train:1901-2000batch: iter_time=1.132e-04, forward_time=0.147, loss_ctc=71.092, loss_att=51.859, acc=0.702, loss=57.629, backward_time=1.027, grad_norm=100.553, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.505e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 02:01:32,452 (trainer:732) INFO: 24epoch:train:2001-2100batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=72.789, loss_att=61.846, acc=0.686, loss=65.129, backward_time=1.026, grad_norm=98.884, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.503e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 02:03:48,170 (trainer:732) INFO: 24epoch:train:2101-2200batch: iter_time=1.204e-04, forward_time=0.145, loss_ctc=78.081, loss_att=55.115, acc=0.697, loss=62.005, backward_time=1.027, grad_norm=100.800, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.501e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 02:06:03,949 (trainer:732) INFO: 24epoch:train:2201-2300batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=80.049, loss_att=63.948, acc=0.678, loss=68.779, backward_time=1.026, grad_norm=114.705, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.499e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 02:08:19,737 (trainer:732) INFO: 24epoch:train:2301-2400batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=65.402, loss_att=53.461, acc=0.704, loss=57.043, backward_time=1.028, grad_norm=91.034, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.498e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:10:35,472 (trainer:732) INFO: 24epoch:train:2401-2500batch: iter_time=1.229e-04, forward_time=0.146, loss_ctc=72.613, loss_att=52.477, acc=0.698, loss=58.518, backward_time=1.026, grad_norm=99.117, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.496e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 02:10:37,997 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 02:10:56,202 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 02:10:59,652 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 02:10:59,652 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 02:10:59,658 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 02:16:07,690 (trainer:732) INFO: 24epoch:train:2501-2600batch: iter_time=1.217, forward_time=0.174, loss_ctc=63.381, loss_att=52.778, acc=0.701, loss=55.959, backward_time=1.043, grad_norm=89.376, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.494e-05, train_time=6.644 +[gpub005:0/64] 2023-07-08 02:18:23,643 (trainer:732) INFO: 24epoch:train:2601-2700batch: iter_time=1.350e-04, forward_time=0.146, loss_ctc=75.419, loss_att=55.105, acc=0.702, loss=61.199, backward_time=1.026, grad_norm=92.859, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.493e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 02:20:39,387 (trainer:732) INFO: 24epoch:train:2701-2800batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=80.075, loss_att=59.644, acc=0.711, loss=65.773, backward_time=1.028, grad_norm=92.749, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.491e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 02:22:55,022 (trainer:732) INFO: 24epoch:train:2801-2900batch: iter_time=1.190e-04, forward_time=0.145, loss_ctc=74.639, loss_att=60.582, acc=0.683, loss=64.799, backward_time=1.027, grad_norm=100.800, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.489e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 02:25:10,817 (trainer:732) INFO: 24epoch:train:2901-3000batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=72.616, loss_att=55.817, acc=0.697, loss=60.856, backward_time=1.027, grad_norm=106.716, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.488e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:27:26,850 (trainer:732) INFO: 24epoch:train:3001-3100batch: iter_time=1.242e-04, forward_time=0.147, loss_ctc=73.982, loss_att=56.264, acc=0.692, loss=61.579, backward_time=1.030, grad_norm=106.373, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.486e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 02:29:42,652 (trainer:732) INFO: 24epoch:train:3101-3200batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=79.191, loss_att=60.052, acc=0.687, loss=65.793, backward_time=1.029, grad_norm=123.443, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.484e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:31:58,456 (trainer:732) INFO: 24epoch:train:3201-3300batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=64.577, loss_att=50.815, acc=0.710, loss=54.944, backward_time=1.027, grad_norm=96.139, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.483e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:32:57,662 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 02:33:16,507 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 02:33:19,930 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 02:33:19,930 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 02:33:19,936 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 02:39:02,932 (trainer:732) INFO: 24epoch:train:3301-3400batch: iter_time=1.245, forward_time=0.157, loss_ctc=68.788, loss_att=57.316, acc=0.692, loss=60.757, backward_time=1.054, grad_norm=97.600, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.481e-05, train_time=8.489 +[gpub005:0/64] 2023-07-08 02:41:19,229 (trainer:732) INFO: 24epoch:train:3401-3500batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=72.583, loss_att=54.522, acc=0.715, loss=59.940, backward_time=1.029, grad_norm=105.703, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.479e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 02:43:35,703 (trainer:732) INFO: 24epoch:train:3501-3600batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=77.004, loss_att=55.591, acc=0.714, loss=62.015, backward_time=1.028, grad_norm=112.170, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.478e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 02:45:51,850 (trainer:732) INFO: 24epoch:train:3601-3700batch: iter_time=1.155e-04, forward_time=0.146, loss_ctc=71.437, loss_att=55.246, acc=0.700, loss=60.103, backward_time=1.029, grad_norm=102.536, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.476e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 02:48:08,047 (trainer:732) INFO: 24epoch:train:3701-3800batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=73.706, loss_att=63.883, acc=0.693, loss=66.830, backward_time=1.029, grad_norm=94.452, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.474e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 02:50:24,069 (trainer:732) INFO: 24epoch:train:3801-3900batch: iter_time=1.117e-04, forward_time=0.146, loss_ctc=75.479, loss_att=54.094, acc=0.707, loss=60.510, backward_time=1.029, grad_norm=97.153, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.473e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 02:52:39,889 (trainer:732) INFO: 24epoch:train:3901-4000batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=78.941, loss_att=58.000, acc=0.700, loss=64.282, backward_time=1.027, grad_norm=111.973, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.471e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 02:54:55,730 (trainer:732) INFO: 24epoch:train:4001-4100batch: iter_time=1.273e-04, forward_time=0.146, loss_ctc=66.834, loss_att=54.123, acc=0.701, loss=57.936, backward_time=1.027, grad_norm=96.300, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.469e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 02:56:27,946 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 02:56:45,945 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 02:56:49,390 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 02:56:49,390 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 02:56:49,396 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 03:01:09,171 (trainer:732) INFO: 24epoch:train:4101-4200batch: iter_time=1.248, forward_time=0.161, loss_ctc=71.173, loss_att=57.787, acc=0.718, loss=61.802, backward_time=1.042, grad_norm=102.778, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.468e-05, train_time=7.469 +[gpub005:0/64] 2023-07-08 03:03:25,586 (trainer:732) INFO: 24epoch:train:4201-4300batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=66.106, loss_att=52.017, acc=0.708, loss=56.244, backward_time=1.033, grad_norm=89.218, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.466e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 03:05:41,834 (trainer:732) INFO: 24epoch:train:4301-4400batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=71.008, loss_att=49.565, acc=0.721, loss=55.998, backward_time=1.030, grad_norm=101.755, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.464e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 03:07:57,995 (trainer:732) INFO: 24epoch:train:4401-4500batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=77.163, loss_att=57.856, acc=0.709, loss=63.648, backward_time=1.029, grad_norm=109.828, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.463e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 03:10:13,988 (trainer:732) INFO: 24epoch:train:4501-4600batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=76.375, loss_att=64.397, acc=0.699, loss=67.990, backward_time=1.029, grad_norm=107.384, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.461e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 03:12:30,013 (trainer:732) INFO: 24epoch:train:4601-4700batch: iter_time=1.345e-04, forward_time=0.148, loss_ctc=70.244, loss_att=51.300, acc=0.711, loss=56.983, backward_time=1.029, grad_norm=100.325, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.459e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 03:14:46,161 (trainer:732) INFO: 24epoch:train:4701-4800batch: iter_time=1.171e-04, forward_time=0.146, loss_ctc=77.063, loss_att=60.448, acc=0.691, loss=65.433, backward_time=1.031, grad_norm=99.011, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.458e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 03:17:07,246 (trainer:732) INFO: 24epoch:train:4801-4900batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=72.573, loss_att=53.899, acc=0.709, loss=59.501, backward_time=1.033, grad_norm=110.928, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.456e-05, train_time=2.821 +[gpub005:0/64] 2023-07-08 03:19:26,086 (trainer:732) INFO: 24epoch:train:4901-5000batch: iter_time=1.179e-04, forward_time=0.146, loss_ctc=71.710, loss_att=56.838, acc=0.706, loss=61.300, backward_time=1.032, grad_norm=102.143, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.454e-05, train_time=2.777 +[gpub005:0/64] 2023-07-08 03:19:27,571 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 03:19:45,853 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 03:19:49,250 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 03:19:49,250 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 03:19:49,256 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 03:25:36,249 (trainer:732) INFO: 24epoch:train:5001-5100batch: iter_time=1.261, forward_time=0.144, loss_ctc=65.391, loss_att=54.498, acc=0.700, loss=57.766, backward_time=1.048, grad_norm=94.745, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.453e-05, train_time=7.403 +[gpub005:0/64] 2023-07-08 03:27:52,012 (trainer:732) INFO: 24epoch:train:5101-5200batch: iter_time=1.034e-04, forward_time=0.145, loss_ctc=72.872, loss_att=53.944, acc=0.711, loss=59.622, backward_time=1.026, grad_norm=120.048, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.451e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 03:30:07,787 (trainer:732) INFO: 24epoch:train:5201-5300batch: iter_time=9.830e-05, forward_time=0.145, loss_ctc=78.638, loss_att=59.184, acc=0.712, loss=65.020, backward_time=1.027, grad_norm=92.094, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.449e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 03:32:25,756 (trainer:732) INFO: 24epoch:train:5301-5400batch: iter_time=9.984e-05, forward_time=0.145, loss_ctc=74.505, loss_att=60.142, acc=0.682, loss=64.451, backward_time=1.044, grad_norm=103.493, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.448e-05, train_time=2.759 +[gpub005:0/64] 2023-07-08 03:34:42,021 (trainer:732) INFO: 24epoch:train:5401-5500batch: iter_time=9.984e-05, forward_time=0.145, loss_ctc=71.932, loss_att=56.685, acc=0.695, loss=61.259, backward_time=1.031, grad_norm=90.686, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.446e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 03:36:58,579 (trainer:732) INFO: 24epoch:train:5501-5600batch: iter_time=1.076e-04, forward_time=0.146, loss_ctc=73.486, loss_att=54.717, acc=0.701, loss=60.348, backward_time=1.032, grad_norm=106.312, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.444e-05, train_time=2.731 +[gpub005:0/64] 2023-07-08 03:39:14,325 (trainer:732) INFO: 24epoch:train:5601-5700batch: iter_time=9.864e-05, forward_time=0.145, loss_ctc=77.877, loss_att=59.762, acc=0.687, loss=65.197, backward_time=1.026, grad_norm=109.617, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.443e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 03:41:30,723 (trainer:732) INFO: 24epoch:train:5701-5800batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=65.162, loss_att=50.988, acc=0.716, loss=55.240, backward_time=1.032, grad_norm=98.285, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.441e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 03:42:16,993 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 03:42:34,998 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 03:42:38,488 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 03:42:38,488 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-08 03:42:38,494 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 03:47:54,199 (trainer:732) INFO: 24epoch:train:5801-5900batch: iter_time=1.236, forward_time=0.154, loss_ctc=69.112, loss_att=56.695, acc=0.701, loss=60.420, backward_time=1.042, grad_norm=120.681, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.439e-05, train_time=7.669 +[gpub005:0/64] 2023-07-08 03:50:22,625 (trainer:732) INFO: 24epoch:train:5901-6000batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=71.865, loss_att=55.398, acc=0.711, loss=60.338, backward_time=1.043, grad_norm=110.200, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.438e-05, train_time=2.968 +[gpub005:0/64] 2023-07-08 03:52:49,847 (trainer:732) INFO: 24epoch:train:6001-6100batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=76.266, loss_att=55.052, acc=0.717, loss=61.416, backward_time=1.051, grad_norm=98.498, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.436e-05, train_time=2.944 +[gpub005:0/64] 2023-07-08 03:55:19,854 (trainer:732) INFO: 24epoch:train:6101-6200batch: iter_time=1.249e-04, forward_time=0.156, loss_ctc=70.751, loss_att=53.614, acc=0.707, loss=58.755, backward_time=1.049, grad_norm=103.884, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.435e-05, train_time=3.000 +[gpub005:0/64] 2023-07-08 03:57:49,296 (trainer:732) INFO: 24epoch:train:6201-6300batch: iter_time=1.229e-04, forward_time=0.148, loss_ctc=76.058, loss_att=64.659, acc=0.695, loss=68.078, backward_time=1.065, grad_norm=100.653, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.433e-05, train_time=2.989 +[gpub005:0/64] 2023-07-08 04:00:09,479 (trainer:732) INFO: 24epoch:train:6301-6400batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=74.958, loss_att=54.669, acc=0.705, loss=60.756, backward_time=1.031, grad_norm=94.045, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.431e-05, train_time=2.803 +[gpub005:0/64] 2023-07-08 04:02:32,143 (trainer:732) INFO: 24epoch:train:6401-6500batch: iter_time=1.264e-04, forward_time=0.145, loss_ctc=78.123, loss_att=58.724, acc=0.701, loss=64.544, backward_time=1.040, grad_norm=109.338, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.430e-05, train_time=2.853 +[gpub005:0/64] 2023-07-08 04:04:51,610 (trainer:732) INFO: 24epoch:train:6501-6600batch: iter_time=1.316e-04, forward_time=0.145, loss_ctc=66.708, loss_att=54.417, acc=0.704, loss=58.104, backward_time=1.032, grad_norm=102.988, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.428e-05, train_time=2.789 +[gpub005:0/64] 2023-07-08 04:06:26,443 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 04:06:44,558 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 04:06:47,998 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 04:06:47,998 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 04:06:48,004 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 04:09:57,980 (trainer:732) INFO: 24epoch:train:6601-6700batch: iter_time=1.411, forward_time=0.145, loss_ctc=70.865, loss_att=57.768, acc=0.718, loss=61.697, backward_time=1.040, grad_norm=96.931, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.426e-05, train_time=6.127 +[gpub005:0/64] 2023-07-08 04:12:15,222 (trainer:732) INFO: 24epoch:train:6701-6800batch: iter_time=1.273e-04, forward_time=0.145, loss_ctc=67.487, loss_att=52.036, acc=0.707, loss=56.671, backward_time=1.034, grad_norm=100.553, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.425e-05, train_time=2.745 +[gpub005:0/64] 2023-07-08 04:14:30,629 (trainer:732) INFO: 24epoch:train:6801-6900batch: iter_time=1.276e-04, forward_time=0.144, loss_ctc=71.361, loss_att=50.340, acc=0.711, loss=56.646, backward_time=1.025, grad_norm=111.542, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.423e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 04:16:46,581 (trainer:732) INFO: 24epoch:train:6901-7000batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=77.871, loss_att=57.029, acc=0.711, loss=63.282, backward_time=1.029, grad_norm=107.653, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.421e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 04:19:02,671 (trainer:732) INFO: 24epoch:train:7001-7100batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=76.050, loss_att=64.346, acc=0.697, loss=67.857, backward_time=1.029, grad_norm=91.851, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.420e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 04:21:18,512 (trainer:732) INFO: 24epoch:train:7101-7200batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=70.235, loss_att=51.323, acc=0.707, loss=56.997, backward_time=1.029, grad_norm=95.632, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.418e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 04:23:34,926 (trainer:732) INFO: 24epoch:train:7201-7300batch: iter_time=1.009e-04, forward_time=0.146, loss_ctc=74.110, loss_att=59.073, acc=0.684, loss=63.584, backward_time=1.033, grad_norm=116.090, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.417e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 04:25:50,893 (trainer:732) INFO: 24epoch:train:7301-7400batch: iter_time=1.053e-04, forward_time=0.145, loss_ctc=71.461, loss_att=54.905, acc=0.707, loss=59.872, backward_time=1.028, grad_norm=89.587, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.415e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 04:28:06,592 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 04:28:24,838 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 04:28:28,253 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 04:28:28,253 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 04:28:28,259 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 04:31:58,721 (trainer:732) INFO: 24epoch:train:7401-7500batch: iter_time=1.248, forward_time=0.157, loss_ctc=70.151, loss_att=54.443, acc=0.701, loss=59.156, backward_time=1.035, grad_norm=102.377, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.413e-05, train_time=7.356 +[gpub005:0/64] 2023-07-08 04:34:16,951 (trainer:732) INFO: 24epoch:train:7501-7600batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=64.218, loss_att=51.088, acc=0.702, loss=55.027, backward_time=1.035, grad_norm=94.349, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.412e-05, train_time=2.765 +[gpub005:0/64] 2023-07-08 04:36:33,188 (trainer:732) INFO: 24epoch:train:7601-7700batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=72.839, loss_att=52.242, acc=0.715, loss=58.421, backward_time=1.027, grad_norm=100.137, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.410e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 04:38:49,029 (trainer:732) INFO: 24epoch:train:7701-7800batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=77.897, loss_att=56.319, acc=0.710, loss=62.792, backward_time=1.029, grad_norm=96.600, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.408e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 04:41:04,866 (trainer:732) INFO: 24epoch:train:7801-7900batch: iter_time=1.253e-04, forward_time=0.145, loss_ctc=75.016, loss_att=64.152, acc=0.690, loss=67.411, backward_time=1.027, grad_norm=99.392, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.407e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 04:43:21,052 (trainer:732) INFO: 24epoch:train:7901-8000batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=69.861, loss_att=50.596, acc=0.706, loss=56.375, backward_time=1.031, grad_norm=85.637, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.405e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 04:45:36,903 (trainer:732) INFO: 24epoch:train:8001-8100batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=75.733, loss_att=59.098, acc=0.684, loss=64.089, backward_time=1.028, grad_norm=96.265, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.404e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 04:47:52,361 (trainer:732) INFO: 24epoch:train:8101-8200batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=71.279, loss_att=55.254, acc=0.703, loss=60.061, backward_time=1.025, grad_norm=104.118, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.402e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 04:50:08,165 (trainer:732) INFO: 24epoch:train:8201-8300batch: iter_time=1.132e-04, forward_time=0.145, loss_ctc=66.791, loss_att=54.537, acc=0.708, loss=58.213, backward_time=1.027, grad_norm=107.484, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.400e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 04:51:06,107 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 04:51:24,031 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 04:51:27,758 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 04:51:27,758 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 04:51:27,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 04:56:22,745 (trainer:732) INFO: 24epoch:train:8301-8400batch: iter_time=2.278, forward_time=0.194, loss_ctc=63.795, loss_att=46.168, acc=0.707, loss=51.456, backward_time=1.048, grad_norm=100.525, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.185, optim0_lr0=7.399e-05, train_time=7.491 +[gpub005:0/64] 2023-07-08 04:58:39,347 (trainer:732) INFO: 24epoch:train:8401-8500batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=70.673, loss_att=54.531, acc=0.718, loss=59.373, backward_time=1.027, grad_norm=101.049, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.397e-05, train_time=2.732 +[gpub005:0/64] 2023-07-08 05:00:56,190 (trainer:732) INFO: 24epoch:train:8501-8600batch: iter_time=1.362e-04, forward_time=0.146, loss_ctc=81.170, loss_att=59.636, acc=0.713, loss=66.096, backward_time=1.033, grad_norm=102.757, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.395e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 05:03:12,609 (trainer:732) INFO: 24epoch:train:8601-8700batch: iter_time=1.274e-04, forward_time=0.147, loss_ctc=70.543, loss_att=53.905, acc=0.698, loss=58.897, backward_time=1.033, grad_norm=98.360, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.394e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 05:05:28,730 (trainer:732) INFO: 24epoch:train:8701-8800batch: iter_time=1.292e-04, forward_time=0.147, loss_ctc=69.057, loss_att=58.090, acc=0.709, loss=61.380, backward_time=1.030, grad_norm=125.170, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.392e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 05:07:44,575 (trainer:732) INFO: 24epoch:train:8801-8900batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=77.481, loss_att=55.840, acc=0.710, loss=62.332, backward_time=1.029, grad_norm=96.606, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.391e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 05:10:00,543 (trainer:732) INFO: 24epoch:train:8901-9000batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=77.860, loss_att=60.412, acc=0.702, loss=65.647, backward_time=1.029, grad_norm=107.480, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.389e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 05:12:16,943 (trainer:732) INFO: 24epoch:train:9001-9100batch: iter_time=1.265e-04, forward_time=0.147, loss_ctc=64.228, loss_att=51.265, acc=0.713, loss=55.154, backward_time=1.032, grad_norm=103.336, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.387e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 05:13:50,730 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 05:14:09,041 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 05:14:12,445 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 05:14:12,446 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 05:14:12,452 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 05:18:16,694 (trainer:732) INFO: 24epoch:train:9101-9200batch: iter_time=1.278, forward_time=0.147, loss_ctc=69.239, loss_att=55.541, acc=0.700, loss=59.650, backward_time=1.040, grad_norm=102.258, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.386e-05, train_time=7.195 +[gpub005:0/64] 2023-07-08 05:20:33,672 (trainer:732) INFO: 24epoch:train:9201-9300batch: iter_time=1.200e-04, forward_time=0.147, loss_ctc=65.579, loss_att=50.029, acc=0.720, loss=54.694, backward_time=1.032, grad_norm=84.538, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.384e-05, train_time=2.739 +[gpub005:0/64] 2023-07-08 05:22:50,622 (trainer:732) INFO: 24epoch:train:9301-9400batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=70.083, loss_att=49.325, acc=0.718, loss=55.553, backward_time=1.030, grad_norm=91.235, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.382e-05, train_time=2.739 +[gpub005:0/64] 2023-07-08 05:25:06,734 (trainer:732) INFO: 24epoch:train:9401-9500batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=77.209, loss_att=57.645, acc=0.710, loss=63.514, backward_time=1.026, grad_norm=95.607, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.182, optim0_lr0=7.381e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 05:27:22,855 (trainer:732) INFO: 24epoch:train:9501-9600batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=75.606, loss_att=64.748, acc=0.703, loss=68.006, backward_time=1.030, grad_norm=94.541, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.379e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 05:29:38,842 (trainer:732) INFO: 24epoch:train:9601-9700batch: iter_time=1.074e-04, forward_time=0.147, loss_ctc=70.548, loss_att=50.666, acc=0.716, loss=56.631, backward_time=1.030, grad_norm=106.879, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.378e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 05:31:55,248 (trainer:732) INFO: 24epoch:train:9701-9800batch: iter_time=1.109e-04, forward_time=0.146, loss_ctc=76.421, loss_att=59.658, acc=0.695, loss=64.687, backward_time=1.032, grad_norm=108.855, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.376e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 05:34:11,300 (trainer:732) INFO: 24epoch:train:9801-9900batch: iter_time=1.108e-04, forward_time=0.145, loss_ctc=69.905, loss_att=51.680, acc=0.718, loss=57.148, backward_time=1.032, grad_norm=97.751, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.374e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 05:36:27,320 (trainer:732) INFO: 24epoch:train:9901-10000batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=71.831, loss_att=57.034, acc=0.706, loss=61.473, backward_time=1.031, grad_norm=88.956, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.373e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 05:49:37,783 (trainer:338) INFO: 24epoch results: [train] iter_time=0.180, forward_time=0.148, loss_ctc=72.844, loss_att=56.139, acc=0.703, loss=61.150, backward_time=1.032, grad_norm=101.161, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.454e-05, train_time=3.286, time=4 hours, 34 minutes and 8.11 seconds, total_count=210000, gpu_max_cached_mem_GB=38.229, [valid] loss_ctc=49.172, cer_ctc=0.283, loss_att=39.899, acc=0.671, cer=0.378, wer=0.988, loss=42.681, time=6 minutes and 54.41 seconds, total_count=21758, gpu_max_cached_mem_GB=38.229, [att_plot] time=5 minutes and 56.93 seconds, total_count=0, gpu_max_cached_mem_GB=38.229 +[gpub005:0/64] 2023-07-08 05:49:53,063 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-08 05:49:53,184 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/17epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/19epoch.pth +[gpub005:0/64] 2023-07-08 05:49:53,184 (trainer:272) INFO: 25/30epoch started. Estimated time to finish: 1 day, 5 hours and 1 minute +[gpub005:0/64] 2023-07-08 05:49:53,188 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 05:50:11,340 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 05:50:15,036 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 05:50:15,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 05:50:15,043 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 05:54:20,277 (trainer:732) INFO: 25epoch:train:1-100batch: iter_time=1.258, forward_time=0.155, loss_ctc=67.394, loss_att=55.790, acc=0.669, loss=59.271, backward_time=1.041, grad_norm=94.745, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.371e-05, train_time=5.341 +[gpub005:0/64] 2023-07-08 05:56:37,742 (trainer:732) INFO: 25epoch:train:101-200batch: iter_time=1.387e-04, forward_time=0.146, loss_ctc=84.308, loss_att=59.589, acc=0.684, loss=67.005, backward_time=1.031, grad_norm=111.641, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.370e-05, train_time=2.749 +[gpub005:0/64] 2023-07-08 05:59:04,212 (trainer:732) INFO: 25epoch:train:201-300batch: iter_time=1.391e-04, forward_time=0.147, loss_ctc=78.994, loss_att=61.853, acc=0.673, loss=66.996, backward_time=1.039, grad_norm=104.903, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.368e-05, train_time=2.929 +[gpub005:0/64] 2023-07-08 06:01:23,041 (trainer:732) INFO: 25epoch:train:301-400batch: iter_time=1.347e-04, forward_time=0.151, loss_ctc=70.404, loss_att=51.376, acc=0.693, loss=57.084, backward_time=1.027, grad_norm=102.277, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.366e-05, train_time=2.776 +[gpub005:0/64] 2023-07-08 06:03:45,683 (trainer:732) INFO: 25epoch:train:401-500batch: iter_time=1.110e-04, forward_time=0.154, loss_ctc=71.110, loss_att=53.943, acc=0.687, loss=59.093, backward_time=1.036, grad_norm=110.916, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.365e-05, train_time=2.853 +[gpub005:0/64] 2023-07-08 06:06:01,868 (trainer:732) INFO: 25epoch:train:501-600batch: iter_time=1.154e-04, forward_time=0.145, loss_ctc=76.464, loss_att=60.641, acc=0.668, loss=65.388, backward_time=1.027, grad_norm=119.647, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.363e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 06:08:24,461 (trainer:732) INFO: 25epoch:train:601-700batch: iter_time=1.100e-04, forward_time=0.160, loss_ctc=73.685, loss_att=54.373, acc=0.707, loss=60.166, backward_time=1.038, grad_norm=117.803, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.362e-05, train_time=2.851 +[gpub005:0/64] 2023-07-08 06:10:47,886 (trainer:732) INFO: 25epoch:train:701-800batch: iter_time=1.234e-04, forward_time=0.153, loss_ctc=73.438, loss_att=57.055, acc=0.681, loss=61.970, backward_time=1.037, grad_norm=102.935, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.360e-05, train_time=2.869 +[gpub005:0/64] 2023-07-08 06:11:41,403 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 06:11:59,269 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 06:12:02,898 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 06:12:02,898 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 06:12:02,905 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 06:16:41,012 (trainer:732) INFO: 25epoch:train:801-900batch: iter_time=1.458, forward_time=0.191, loss_ctc=67.076, loss_att=57.674, acc=0.679, loss=60.495, backward_time=1.042, grad_norm=97.265, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.185, optim0_lr0=7.358e-05, train_time=7.062 +[gpub005:0/64] 2023-07-08 06:18:59,652 (trainer:732) INFO: 25epoch:train:901-1000batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=80.822, loss_att=56.630, acc=0.693, loss=63.887, backward_time=1.033, grad_norm=105.567, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.357e-05, train_time=2.773 +[gpub005:0/64] 2023-07-08 06:21:15,828 (trainer:732) INFO: 25epoch:train:1001-1100batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=80.627, loss_att=60.328, acc=0.686, loss=66.417, backward_time=1.029, grad_norm=110.847, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.355e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 06:23:34,704 (trainer:732) INFO: 25epoch:train:1101-1200batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=68.462, loss_att=49.783, acc=0.699, loss=55.387, backward_time=1.038, grad_norm=96.138, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.354e-05, train_time=2.777 +[gpub005:0/64] 2023-07-08 06:25:51,451 (trainer:732) INFO: 25epoch:train:1201-1300batch: iter_time=1.223e-04, forward_time=0.147, loss_ctc=68.844, loss_att=51.168, acc=0.687, loss=56.471, backward_time=1.030, grad_norm=91.064, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.352e-05, train_time=2.735 +[gpub005:0/64] 2023-07-08 06:28:09,593 (trainer:732) INFO: 25epoch:train:1301-1400batch: iter_time=1.119e-04, forward_time=0.147, loss_ctc=77.293, loss_att=63.126, acc=0.669, loss=67.376, backward_time=1.029, grad_norm=94.136, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.351e-05, train_time=2.763 +[gpub005:0/64] 2023-07-08 06:30:27,572 (trainer:732) INFO: 25epoch:train:1401-1500batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=72.320, loss_att=52.628, acc=0.702, loss=58.535, backward_time=1.031, grad_norm=107.375, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.349e-05, train_time=2.759 +[gpub005:0/64] 2023-07-08 06:32:43,510 (trainer:732) INFO: 25epoch:train:1501-1600batch: iter_time=1.230e-04, forward_time=0.146, loss_ctc=72.172, loss_att=55.048, acc=0.694, loss=60.185, backward_time=1.030, grad_norm=114.079, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.347e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 06:34:30,871 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 06:34:49,193 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 06:34:52,675 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 06:34:52,675 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 06:34:52,681 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 06:39:03,625 (trainer:732) INFO: 25epoch:train:1601-1700batch: iter_time=2.312, forward_time=0.145, loss_ctc=62.890, loss_att=49.929, acc=0.685, loss=53.817, backward_time=1.041, grad_norm=91.507, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.346e-05, train_time=7.602 +[gpub005:0/64] 2023-07-08 06:41:20,097 (trainer:732) INFO: 25epoch:train:1701-1800batch: iter_time=9.406e-05, forward_time=0.144, loss_ctc=73.172, loss_att=63.151, acc=0.679, loss=66.157, backward_time=1.032, grad_norm=103.748, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.344e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 06:43:36,333 (trainer:732) INFO: 25epoch:train:1801-1900batch: iter_time=9.608e-05, forward_time=0.144, loss_ctc=77.961, loss_att=56.059, acc=0.692, loss=62.629, backward_time=1.030, grad_norm=116.681, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.343e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 06:45:52,234 (trainer:732) INFO: 25epoch:train:1901-2000batch: iter_time=1.022e-04, forward_time=0.144, loss_ctc=76.589, loss_att=57.774, acc=0.696, loss=63.419, backward_time=1.028, grad_norm=111.423, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.341e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 06:48:07,914 (trainer:732) INFO: 25epoch:train:2001-2100batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=70.293, loss_att=53.244, acc=0.693, loss=58.359, backward_time=1.027, grad_norm=96.267, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.339e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 06:50:23,824 (trainer:732) INFO: 25epoch:train:2101-2200batch: iter_time=1.005e-04, forward_time=0.145, loss_ctc=67.023, loss_att=52.896, acc=0.693, loss=57.134, backward_time=1.028, grad_norm=92.993, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.338e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 06:52:40,143 (trainer:732) INFO: 25epoch:train:2201-2300batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=76.121, loss_att=61.259, acc=0.706, loss=65.718, backward_time=1.031, grad_norm=108.003, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.336e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 06:54:56,170 (trainer:732) INFO: 25epoch:train:2301-2400batch: iter_time=1.002e-04, forward_time=0.145, loss_ctc=76.319, loss_att=58.518, acc=0.696, loss=63.859, backward_time=1.030, grad_norm=138.170, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.335e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 06:57:11,886 (trainer:732) INFO: 25epoch:train:2401-2500batch: iter_time=1.036e-04, forward_time=0.145, loss_ctc=66.720, loss_att=49.422, acc=0.706, loss=54.612, backward_time=1.027, grad_norm=88.293, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.333e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 06:57:13,253 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 06:57:31,639 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 06:57:35,098 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 06:57:35,098 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 06:57:35,104 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 07:03:55,517 (trainer:732) INFO: 25epoch:train:2501-2600batch: iter_time=1.269, forward_time=0.168, loss_ctc=71.680, loss_att=61.817, acc=0.673, loss=64.776, backward_time=1.045, grad_norm=123.962, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.332e-05, train_time=8.072 +[gpub005:0/64] 2023-07-08 07:06:11,651 (trainer:732) INFO: 25epoch:train:2601-2700batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=78.835, loss_att=55.917, acc=0.698, loss=62.792, backward_time=1.028, grad_norm=111.638, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.330e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 07:08:27,988 (trainer:732) INFO: 25epoch:train:2701-2800batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=74.238, loss_att=55.257, acc=0.702, loss=60.951, backward_time=1.029, grad_norm=93.733, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.328e-05, train_time=2.727 +[gpub005:0/64] 2023-07-08 07:10:43,669 (trainer:732) INFO: 25epoch:train:2801-2900batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=69.537, loss_att=53.230, acc=0.695, loss=58.122, backward_time=1.026, grad_norm=92.779, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.327e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 07:12:59,646 (trainer:732) INFO: 25epoch:train:2901-3000batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=69.671, loss_att=54.659, acc=0.687, loss=59.162, backward_time=1.030, grad_norm=102.190, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.325e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 07:15:16,087 (trainer:732) INFO: 25epoch:train:3001-3100batch: iter_time=1.036e-04, forward_time=0.146, loss_ctc=73.047, loss_att=57.793, acc=0.706, loss=62.369, backward_time=1.032, grad_norm=97.007, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.324e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 07:17:32,402 (trainer:732) INFO: 25epoch:train:3101-3200batch: iter_time=9.844e-05, forward_time=0.145, loss_ctc=75.818, loss_att=59.026, acc=0.709, loss=64.063, backward_time=1.031, grad_norm=99.614, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.322e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 07:19:48,026 (trainer:732) INFO: 25epoch:train:3201-3300batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=65.520, loss_att=44.135, acc=0.710, loss=50.551, backward_time=1.027, grad_norm=101.011, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.321e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 07:20:36,040 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 07:20:54,437 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 07:20:57,907 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 07:20:57,907 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 07:20:57,913 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 07:27:17,085 (trainer:732) INFO: 25epoch:train:3301-3400batch: iter_time=1.283, forward_time=0.146, loss_ctc=63.050, loss_att=54.013, acc=0.684, loss=56.724, backward_time=1.043, grad_norm=89.866, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.319e-05, train_time=8.981 +[gpub005:0/64] 2023-07-08 07:29:33,399 (trainer:732) INFO: 25epoch:train:3401-3500batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=81.752, loss_att=62.301, acc=0.685, loss=68.136, backward_time=1.029, grad_norm=109.748, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.317e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 07:31:49,292 (trainer:732) INFO: 25epoch:train:3501-3600batch: iter_time=1.184e-04, forward_time=0.147, loss_ctc=74.281, loss_att=56.625, acc=0.693, loss=61.921, backward_time=1.027, grad_norm=105.121, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.316e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 07:34:05,347 (trainer:732) INFO: 25epoch:train:3601-3700batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=75.075, loss_att=55.767, acc=0.693, loss=61.559, backward_time=1.030, grad_norm=97.157, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.314e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 07:36:20,907 (trainer:732) INFO: 25epoch:train:3701-3800batch: iter_time=1.206e-04, forward_time=0.147, loss_ctc=69.361, loss_att=52.975, acc=0.686, loss=57.891, backward_time=1.026, grad_norm=97.077, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.313e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 07:38:36,925 (trainer:732) INFO: 25epoch:train:3801-3900batch: iter_time=1.209e-04, forward_time=0.147, loss_ctc=69.622, loss_att=55.526, acc=0.675, loss=59.755, backward_time=1.030, grad_norm=104.909, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.311e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 07:40:52,788 (trainer:732) INFO: 25epoch:train:3901-4000batch: iter_time=1.126e-04, forward_time=0.147, loss_ctc=77.126, loss_att=58.450, acc=0.698, loss=64.053, backward_time=1.028, grad_norm=127.297, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.310e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 07:43:08,833 (trainer:732) INFO: 25epoch:train:4001-4100batch: iter_time=1.138e-04, forward_time=0.147, loss_ctc=70.297, loss_att=52.546, acc=0.700, loss=57.871, backward_time=1.031, grad_norm=108.860, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.308e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 07:44:41,113 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 07:44:59,453 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 07:45:02,893 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 07:45:02,893 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 07:45:02,899 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 07:48:00,388 (trainer:732) INFO: 25epoch:train:4101-4200batch: iter_time=1.244, forward_time=0.152, loss_ctc=63.813, loss_att=47.498, acc=0.698, loss=52.392, backward_time=1.044, grad_norm=101.569, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.306e-05, train_time=5.831 +[gpub005:0/64] 2023-07-08 07:50:17,294 (trainer:732) INFO: 25epoch:train:4201-4300batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=74.377, loss_att=62.988, acc=0.675, loss=66.405, backward_time=1.031, grad_norm=106.091, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.305e-05, train_time=2.738 +[gpub005:0/64] 2023-07-08 07:52:33,320 (trainer:732) INFO: 25epoch:train:4301-4400batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=78.418, loss_att=54.676, acc=0.694, loss=61.798, backward_time=1.026, grad_norm=105.223, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.303e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 07:54:49,532 (trainer:732) INFO: 25epoch:train:4401-4500batch: iter_time=1.139e-04, forward_time=0.145, loss_ctc=75.077, loss_att=55.026, acc=0.702, loss=61.042, backward_time=1.033, grad_norm=101.895, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.302e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 07:57:04,904 (trainer:732) INFO: 25epoch:train:4501-4600batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=69.245, loss_att=52.258, acc=0.691, loss=57.354, backward_time=1.024, grad_norm=112.451, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.300e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 07:59:23,124 (trainer:732) INFO: 25epoch:train:4601-4700batch: iter_time=1.253e-04, forward_time=0.148, loss_ctc=67.163, loss_att=53.729, acc=0.683, loss=57.759, backward_time=1.031, grad_norm=104.056, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.299e-05, train_time=2.764 +[gpub005:0/64] 2023-07-08 08:01:40,001 (trainer:732) INFO: 25epoch:train:4701-4800batch: iter_time=1.179e-04, forward_time=0.147, loss_ctc=75.490, loss_att=58.879, acc=0.703, loss=63.862, backward_time=1.029, grad_norm=103.480, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.297e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 08:03:56,194 (trainer:732) INFO: 25epoch:train:4801-4900batch: iter_time=1.366e-04, forward_time=0.146, loss_ctc=74.857, loss_att=56.613, acc=0.698, loss=62.086, backward_time=1.033, grad_norm=105.020, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.296e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 08:06:11,952 (trainer:732) INFO: 25epoch:train:4901-5000batch: iter_time=1.173e-04, forward_time=0.146, loss_ctc=67.657, loss_att=51.018, acc=0.697, loss=56.010, backward_time=1.027, grad_norm=92.263, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.294e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 08:06:13,258 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 08:06:31,398 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 08:06:34,942 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 08:06:34,942 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 08:06:34,948 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 08:11:35,592 (trainer:732) INFO: 25epoch:train:5001-5100batch: iter_time=1.225, forward_time=0.145, loss_ctc=64.552, loss_att=53.371, acc=0.688, loss=56.726, backward_time=1.044, grad_norm=89.113, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.292e-05, train_time=6.473 +[gpub005:0/64] 2023-07-08 08:13:51,847 (trainer:732) INFO: 25epoch:train:5101-5200batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=79.661, loss_att=57.127, acc=0.702, loss=63.887, backward_time=1.031, grad_norm=107.156, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.291e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 08:16:08,081 (trainer:732) INFO: 25epoch:train:5201-5300batch: iter_time=1.105e-04, forward_time=0.147, loss_ctc=76.918, loss_att=60.190, acc=0.694, loss=65.209, backward_time=1.031, grad_norm=114.919, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.289e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 08:18:23,889 (trainer:732) INFO: 25epoch:train:5301-5400batch: iter_time=1.060e-04, forward_time=0.146, loss_ctc=68.186, loss_att=50.614, acc=0.703, loss=55.886, backward_time=1.027, grad_norm=98.722, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.288e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 08:20:39,880 (trainer:732) INFO: 25epoch:train:5401-5500batch: iter_time=1.119e-04, forward_time=0.148, loss_ctc=68.353, loss_att=52.983, acc=0.701, loss=57.594, backward_time=1.028, grad_norm=93.295, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.286e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 08:22:56,095 (trainer:732) INFO: 25epoch:train:5501-5600batch: iter_time=1.114e-04, forward_time=0.147, loss_ctc=72.638, loss_att=57.921, acc=0.694, loss=62.336, backward_time=1.032, grad_norm=118.868, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.285e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 08:25:12,299 (trainer:732) INFO: 25epoch:train:5601-5700batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=72.064, loss_att=54.239, acc=0.715, loss=59.587, backward_time=1.032, grad_norm=100.871, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.283e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 08:27:28,098 (trainer:732) INFO: 25epoch:train:5701-5800batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=70.937, loss_att=53.502, acc=0.705, loss=58.733, backward_time=1.029, grad_norm=105.846, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.282e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 08:28:14,331 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 08:28:32,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 08:28:36,003 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 08:28:36,003 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 08:28:36,010 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 08:32:06,642 (trainer:732) INFO: 25epoch:train:5801-5900batch: iter_time=1.229, forward_time=0.168, loss_ctc=61.899, loss_att=49.951, acc=0.692, loss=53.536, backward_time=1.043, grad_norm=114.240, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.280e-05, train_time=5.570 +[gpub005:0/64] 2023-07-08 08:34:23,716 (trainer:732) INFO: 25epoch:train:5901-6000batch: iter_time=1.036e-04, forward_time=0.146, loss_ctc=80.144, loss_att=59.966, acc=0.694, loss=66.019, backward_time=1.033, grad_norm=117.796, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.279e-05, train_time=2.741 +[gpub005:0/64] 2023-07-08 08:36:39,086 (trainer:732) INFO: 25epoch:train:6001-6100batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=74.687, loss_att=55.806, acc=0.699, loss=61.470, backward_time=1.025, grad_norm=102.244, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.277e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 08:38:54,592 (trainer:732) INFO: 25epoch:train:6101-6200batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=74.301, loss_att=54.846, acc=0.696, loss=60.682, backward_time=1.026, grad_norm=93.497, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.275e-05, train_time=2.710 +[gpub005:0/64] 2023-07-08 08:41:10,050 (trainer:732) INFO: 25epoch:train:6201-6300batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=69.302, loss_att=51.670, acc=0.693, loss=56.959, backward_time=1.027, grad_norm=100.115, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.274e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 08:43:26,047 (trainer:732) INFO: 25epoch:train:6301-6400batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=70.805, loss_att=56.455, acc=0.675, loss=60.760, backward_time=1.029, grad_norm=84.726, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.272e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 08:45:41,895 (trainer:732) INFO: 25epoch:train:6401-6500batch: iter_time=1.251e-04, forward_time=0.144, loss_ctc=75.784, loss_att=56.769, acc=0.708, loss=62.473, backward_time=1.028, grad_norm=113.466, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.271e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 08:47:57,577 (trainer:732) INFO: 25epoch:train:6501-6600batch: iter_time=1.128e-04, forward_time=0.145, loss_ctc=69.709, loss_att=52.710, acc=0.701, loss=57.809, backward_time=1.027, grad_norm=106.183, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.269e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 08:49:31,994 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 08:49:50,303 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 08:49:53,685 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 08:49:53,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 08:49:53,692 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 08:53:40,732 (trainer:732) INFO: 25epoch:train:6601-6700batch: iter_time=1.490, forward_time=0.185, loss_ctc=61.334, loss_att=48.253, acc=0.699, loss=52.177, backward_time=1.043, grad_norm=98.206, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.185, optim0_lr0=7.268e-05, train_time=6.863 +[gpub005:0/64] 2023-07-08 08:55:57,864 (trainer:732) INFO: 25epoch:train:6701-6800batch: iter_time=1.189e-04, forward_time=0.147, loss_ctc=76.795, loss_att=61.313, acc=0.677, loss=65.958, backward_time=1.034, grad_norm=106.691, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.266e-05, train_time=2.742 +[gpub005:0/64] 2023-07-08 08:58:14,559 (trainer:732) INFO: 25epoch:train:6801-6900batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=76.474, loss_att=54.969, acc=0.701, loss=61.420, backward_time=1.027, grad_norm=92.293, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.265e-05, train_time=2.734 +[gpub005:0/64] 2023-07-08 09:00:30,838 (trainer:732) INFO: 25epoch:train:6901-7000batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=74.051, loss_att=55.688, acc=0.700, loss=61.197, backward_time=1.029, grad_norm=124.427, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.263e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 09:02:46,205 (trainer:732) INFO: 25epoch:train:7001-7100batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=69.640, loss_att=51.459, acc=0.684, loss=56.913, backward_time=1.025, grad_norm=101.859, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.262e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 09:05:02,287 (trainer:732) INFO: 25epoch:train:7101-7200batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=67.988, loss_att=53.614, acc=0.682, loss=57.926, backward_time=1.030, grad_norm=98.586, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.260e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 09:07:18,114 (trainer:732) INFO: 25epoch:train:7201-7300batch: iter_time=1.140e-04, forward_time=0.146, loss_ctc=78.895, loss_att=59.924, acc=0.702, loss=65.615, backward_time=1.029, grad_norm=107.921, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.259e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 09:09:34,216 (trainer:732) INFO: 25epoch:train:7301-7400batch: iter_time=1.041e-04, forward_time=0.147, loss_ctc=68.386, loss_att=52.947, acc=0.699, loss=57.579, backward_time=1.029, grad_norm=100.878, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.257e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 09:11:57,640 (trainer:732) INFO: 25epoch:train:7401-7500batch: iter_time=1.042e-04, forward_time=0.147, loss_ctc=65.301, loss_att=50.575, acc=0.707, loss=54.993, backward_time=1.045, grad_norm=94.219, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.255e-05, train_time=2.868 +[gpub005:0/64] 2023-07-08 09:11:59,129 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 09:12:17,091 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 09:12:20,533 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 09:12:20,534 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 09:12:20,540 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 09:19:22,003 (trainer:732) INFO: 25epoch:train:7501-7600batch: iter_time=1.244, forward_time=0.146, loss_ctc=70.509, loss_att=61.157, acc=0.678, loss=63.963, backward_time=1.041, grad_norm=134.173, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.254e-05, train_time=8.887 +[gpub005:0/64] 2023-07-08 09:21:38,576 (trainer:732) INFO: 25epoch:train:7601-7700batch: iter_time=1.115e-04, forward_time=0.146, loss_ctc=78.059, loss_att=56.103, acc=0.702, loss=62.690, backward_time=1.028, grad_norm=103.019, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.252e-05, train_time=2.731 +[gpub005:0/64] 2023-07-08 09:23:54,909 (trainer:732) INFO: 25epoch:train:7701-7800batch: iter_time=1.239e-04, forward_time=0.148, loss_ctc=72.875, loss_att=55.188, acc=0.704, loss=60.494, backward_time=1.028, grad_norm=104.478, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.251e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 09:26:13,203 (trainer:732) INFO: 25epoch:train:7801-7900batch: iter_time=1.334e-04, forward_time=0.145, loss_ctc=69.878, loss_att=53.424, acc=0.699, loss=58.360, backward_time=1.025, grad_norm=132.395, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.249e-05, train_time=2.766 +[gpub005:0/64] 2023-07-08 09:28:29,065 (trainer:732) INFO: 25epoch:train:7901-8000batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=66.722, loss_att=53.254, acc=0.693, loss=57.294, backward_time=1.026, grad_norm=96.573, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.248e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 09:30:45,214 (trainer:732) INFO: 25epoch:train:8001-8100batch: iter_time=1.310e-04, forward_time=0.147, loss_ctc=72.346, loss_att=57.409, acc=0.709, loss=61.890, backward_time=1.028, grad_norm=100.161, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.246e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 09:33:01,171 (trainer:732) INFO: 25epoch:train:8101-8200batch: iter_time=1.067e-04, forward_time=0.147, loss_ctc=75.162, loss_att=59.259, acc=0.708, loss=64.030, backward_time=1.029, grad_norm=104.558, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.245e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 09:35:16,902 (trainer:732) INFO: 25epoch:train:8201-8300batch: iter_time=1.014e-04, forward_time=0.146, loss_ctc=63.470, loss_att=43.141, acc=0.713, loss=49.240, backward_time=1.028, grad_norm=88.867, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.243e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 09:36:06,322 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 09:36:24,330 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 09:36:28,070 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 09:36:28,070 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 09:36:28,077 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 09:41:52,004 (trainer:732) INFO: 25epoch:train:8301-8400batch: iter_time=1.423, forward_time=0.186, loss_ctc=61.528, loss_att=54.110, acc=0.687, loss=56.335, backward_time=1.049, grad_norm=94.433, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.184, optim0_lr0=7.242e-05, train_time=7.902 +[gpub005:0/64] 2023-07-08 09:44:08,921 (trainer:732) INFO: 25epoch:train:8401-8500batch: iter_time=1.226e-04, forward_time=0.147, loss_ctc=80.713, loss_att=60.130, acc=0.699, loss=66.305, backward_time=1.028, grad_norm=101.118, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.240e-05, train_time=2.738 +[gpub005:0/64] 2023-07-08 09:46:25,326 (trainer:732) INFO: 25epoch:train:8501-8600batch: iter_time=1.084e-04, forward_time=0.146, loss_ctc=72.864, loss_att=55.261, acc=0.709, loss=60.542, backward_time=1.029, grad_norm=110.946, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.239e-05, train_time=2.728 +[gpub005:0/64] 2023-07-08 09:48:41,215 (trainer:732) INFO: 25epoch:train:8601-8700batch: iter_time=1.034e-04, forward_time=0.145, loss_ctc=74.393, loss_att=55.528, acc=0.705, loss=61.188, backward_time=1.028, grad_norm=123.026, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.237e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 09:50:57,064 (trainer:732) INFO: 25epoch:train:8701-8800batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=68.057, loss_att=52.431, acc=0.694, loss=57.119, backward_time=1.028, grad_norm=108.479, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.236e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 09:53:13,155 (trainer:732) INFO: 25epoch:train:8801-8900batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=69.612, loss_att=54.933, acc=0.693, loss=59.336, backward_time=1.030, grad_norm=97.704, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.234e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 09:55:29,117 (trainer:732) INFO: 25epoch:train:8901-9000batch: iter_time=1.064e-04, forward_time=0.146, loss_ctc=74.272, loss_att=56.728, acc=0.717, loss=61.992, backward_time=1.029, grad_norm=139.457, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.233e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 09:57:44,878 (trainer:732) INFO: 25epoch:train:9001-9100batch: iter_time=1.241e-04, forward_time=0.148, loss_ctc=69.803, loss_att=53.448, acc=0.703, loss=58.354, backward_time=1.027, grad_norm=100.770, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.231e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 09:59:28,828 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 09:59:46,947 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 09:59:50,459 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 09:59:50,460 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-08 09:59:50,466 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 10:04:57,427 (trainer:732) INFO: 25epoch:train:9101-9200batch: iter_time=2.123, forward_time=0.149, loss_ctc=61.440, loss_att=46.047, acc=0.707, loss=50.665, backward_time=1.039, grad_norm=93.648, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.230e-05, train_time=8.651 +[gpub005:0/64] 2023-07-08 10:07:16,844 (trainer:732) INFO: 25epoch:train:9201-9300batch: iter_time=1.315e-04, forward_time=0.147, loss_ctc=70.907, loss_att=60.317, acc=0.693, loss=63.494, backward_time=1.032, grad_norm=107.964, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.228e-05, train_time=2.788 +[gpub005:0/64] 2023-07-08 10:10:27,961 (trainer:732) INFO: 25epoch:train:9301-9400batch: iter_time=1.220e-04, forward_time=0.148, loss_ctc=76.024, loss_att=54.562, acc=0.702, loss=61.001, backward_time=1.074, grad_norm=98.849, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.227e-05, train_time=3.822 +[gpub005:0/64] 2023-07-08 10:13:29,821 (trainer:732) INFO: 25epoch:train:9401-9500batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=74.449, loss_att=55.408, acc=0.705, loss=61.120, backward_time=1.077, grad_norm=113.210, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.225e-05, train_time=3.637 +[gpub005:0/64] 2023-07-08 10:16:27,233 (trainer:732) INFO: 25epoch:train:9501-9600batch: iter_time=1.013e-04, forward_time=0.144, loss_ctc=69.774, loss_att=53.169, acc=0.695, loss=58.150, backward_time=1.088, grad_norm=100.371, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.224e-05, train_time=3.548 +[gpub005:0/64] 2023-07-08 10:19:25,557 (trainer:732) INFO: 25epoch:train:9601-9700batch: iter_time=9.534e-05, forward_time=0.146, loss_ctc=67.052, loss_att=52.342, acc=0.696, loss=56.755, backward_time=1.091, grad_norm=115.351, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.222e-05, train_time=3.566 +[gpub005:0/64] 2023-07-08 10:22:24,138 (trainer:732) INFO: 25epoch:train:9701-9800batch: iter_time=9.886e-05, forward_time=0.146, loss_ctc=75.203, loss_att=60.399, acc=0.707, loss=64.840, backward_time=1.082, grad_norm=99.748, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.221e-05, train_time=3.571 +[gpub005:0/64] 2023-07-08 10:25:09,252 (trainer:732) INFO: 25epoch:train:9801-9900batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=73.722, loss_att=56.459, acc=0.708, loss=61.638, backward_time=1.067, grad_norm=108.586, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.219e-05, train_time=3.302 +[gpub005:0/64] 2023-07-08 10:27:55,647 (trainer:732) INFO: 25epoch:train:9901-10000batch: iter_time=9.428e-05, forward_time=0.145, loss_ctc=65.177, loss_att=47.351, acc=0.716, loss=52.698, backward_time=1.069, grad_norm=95.321, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.218e-05, train_time=3.328 +[gpub005:0/64] 2023-07-08 10:42:07,209 (trainer:338) INFO: 25epoch results: [train] iter_time=0.176, forward_time=0.148, loss_ctc=71.983, loss_att=55.204, acc=0.695, loss=60.238, backward_time=1.035, grad_norm=104.818, clip=100.000, loss_scale=2.456e+22, optim_step_time=0.183, optim0_lr0=7.294e-05, train_time=3.336, time=4 hours, 38 minutes and 25.25 seconds, total_count=220000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=47.009, cer_ctc=0.273, loss_att=42.530, acc=0.652, cer=0.442, wer=1.000, loss=43.874, time=7 minutes and 56.01 seconds, total_count=22770, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 52.76 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-08 10:42:26,240 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-08 10:42:26,246 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/20epoch.pth +[gpub005:0/64] 2023-07-08 10:42:26,311 (trainer:272) INFO: 26/30epoch started. Estimated time to finish: 1 day, 15 minutes and 1.2 seconds +[gpub005:0/64] 2023-07-08 10:42:27,702 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 10:42:46,699 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 10:42:50,222 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 10:42:50,222 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 10:42:50,310 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 10:53:23,692 (trainer:732) INFO: 26epoch:train:1-100batch: iter_time=5.145, forward_time=0.174, loss_ctc=69.387, loss_att=53.258, acc=0.713, loss=58.097, backward_time=1.043, grad_norm=87.743, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.186, optim0_lr0=7.216e-05, train_time=13.133 +[gpub005:0/64] 2023-07-08 10:55:39,564 (trainer:732) INFO: 26epoch:train:101-200batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=69.584, loss_att=53.691, acc=0.707, loss=58.459, backward_time=1.030, grad_norm=113.614, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.215e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 10:57:57,029 (trainer:732) INFO: 26epoch:train:201-300batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=76.842, loss_att=60.594, acc=0.691, loss=65.468, backward_time=1.031, grad_norm=97.403, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.213e-05, train_time=2.749 +[gpub005:0/64] 2023-07-08 11:00:13,880 (trainer:732) INFO: 26epoch:train:301-400batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=75.295, loss_att=55.762, acc=0.698, loss=61.622, backward_time=1.028, grad_norm=130.883, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.212e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 11:02:31,540 (trainer:732) INFO: 26epoch:train:401-500batch: iter_time=1.269e-04, forward_time=0.145, loss_ctc=71.385, loss_att=52.705, acc=0.691, loss=58.309, backward_time=1.030, grad_norm=118.277, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.210e-05, train_time=2.753 +[gpub005:0/64] 2023-07-08 11:04:48,000 (trainer:732) INFO: 26epoch:train:501-600batch: iter_time=1.212e-04, forward_time=0.146, loss_ctc=86.870, loss_att=64.836, acc=0.690, loss=71.446, backward_time=1.030, grad_norm=142.009, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.209e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 11:07:14,447 (trainer:732) INFO: 26epoch:train:601-700batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=75.534, loss_att=59.806, acc=0.685, loss=64.524, backward_time=1.037, grad_norm=135.306, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.207e-05, train_time=2.929 +[gpub005:0/64] 2023-07-08 11:09:37,366 (trainer:732) INFO: 26epoch:train:701-800batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=75.179, loss_att=56.787, acc=0.705, loss=62.304, backward_time=1.047, grad_norm=102.383, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.206e-05, train_time=2.858 +[gpub005:0/64] 2023-07-08 11:10:31,607 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 11:10:49,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 11:10:52,871 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 11:10:52,871 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 11:10:52,877 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 11:14:37,121 (trainer:732) INFO: 26epoch:train:801-900batch: iter_time=1.279, forward_time=0.180, loss_ctc=66.178, loss_att=53.712, acc=0.706, loss=57.452, backward_time=1.040, grad_norm=98.261, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.204e-05, train_time=5.995 +[gpub005:0/64] 2023-07-08 11:16:53,461 (trainer:732) INFO: 26epoch:train:901-1000batch: iter_time=1.033e-04, forward_time=0.146, loss_ctc=69.626, loss_att=51.851, acc=0.699, loss=57.184, backward_time=1.025, grad_norm=93.025, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.203e-05, train_time=2.727 +[gpub005:0/64] 2023-07-08 11:19:09,392 (trainer:732) INFO: 26epoch:train:1001-1100batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=72.606, loss_att=58.385, acc=0.692, loss=62.651, backward_time=1.028, grad_norm=94.394, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.201e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 11:21:25,206 (trainer:732) INFO: 26epoch:train:1101-1200batch: iter_time=1.058e-04, forward_time=0.145, loss_ctc=75.555, loss_att=58.013, acc=0.691, loss=63.275, backward_time=1.026, grad_norm=102.726, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.200e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 11:23:40,877 (trainer:732) INFO: 26epoch:train:1201-1300batch: iter_time=1.002e-04, forward_time=0.146, loss_ctc=74.959, loss_att=55.140, acc=0.697, loss=61.086, backward_time=1.028, grad_norm=94.634, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.198e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 11:25:56,605 (trainer:732) INFO: 26epoch:train:1301-1400batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=78.132, loss_att=58.606, acc=0.681, loss=64.464, backward_time=1.028, grad_norm=128.909, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.197e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 11:28:12,321 (trainer:732) INFO: 26epoch:train:1401-1500batch: iter_time=1.088e-04, forward_time=0.146, loss_ctc=80.030, loss_att=66.092, acc=0.666, loss=70.273, backward_time=1.027, grad_norm=120.822, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.195e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 11:30:27,679 (trainer:732) INFO: 26epoch:train:1501-1600batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=72.335, loss_att=51.339, acc=0.698, loss=57.638, backward_time=1.027, grad_norm=90.172, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.194e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 11:32:05,236 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 11:32:23,714 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 11:32:27,250 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 11:32:27,250 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 11:32:27,257 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 11:35:43,326 (trainer:732) INFO: 26epoch:train:1601-1700batch: iter_time=1.695, forward_time=0.145, loss_ctc=67.914, loss_att=52.867, acc=0.699, loss=57.381, backward_time=1.039, grad_norm=90.508, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.192e-05, train_time=6.313 +[gpub005:0/64] 2023-07-08 11:37:59,366 (trainer:732) INFO: 26epoch:train:1701-1800batch: iter_time=1.022e-04, forward_time=0.145, loss_ctc=69.223, loss_att=53.087, acc=0.697, loss=57.928, backward_time=1.029, grad_norm=89.506, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.191e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 11:40:15,245 (trainer:732) INFO: 26epoch:train:1801-1900batch: iter_time=9.824e-05, forward_time=0.145, loss_ctc=72.519, loss_att=57.553, acc=0.693, loss=62.043, backward_time=1.029, grad_norm=92.250, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.189e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 11:42:31,327 (trainer:732) INFO: 26epoch:train:1901-2000batch: iter_time=9.918e-05, forward_time=0.146, loss_ctc=76.626, loss_att=56.219, acc=0.694, loss=62.341, backward_time=1.028, grad_norm=104.830, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.188e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 11:44:46,889 (trainer:732) INFO: 26epoch:train:2001-2100batch: iter_time=1.021e-04, forward_time=0.145, loss_ctc=76.462, loss_att=56.079, acc=0.699, loss=62.194, backward_time=1.026, grad_norm=100.652, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.186e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 11:47:02,624 (trainer:732) INFO: 26epoch:train:2101-2200batch: iter_time=1.040e-04, forward_time=0.146, loss_ctc=74.787, loss_att=56.538, acc=0.684, loss=62.013, backward_time=1.027, grad_norm=120.468, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.185e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 11:49:18,264 (trainer:732) INFO: 26epoch:train:2201-2300batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=84.338, loss_att=68.512, acc=0.663, loss=73.260, backward_time=1.027, grad_norm=102.630, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.183e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 11:51:33,885 (trainer:732) INFO: 26epoch:train:2301-2400batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=70.380, loss_att=50.909, acc=0.699, loss=56.750, backward_time=1.027, grad_norm=96.571, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.182e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 11:53:49,838 (trainer:732) INFO: 26epoch:train:2401-2500batch: iter_time=1.182e-04, forward_time=0.148, loss_ctc=69.633, loss_att=54.823, acc=0.700, loss=59.266, backward_time=1.028, grad_norm=116.785, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.180e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 11:53:51,202 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 11:54:09,386 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 11:54:12,870 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 11:54:12,870 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 11:54:12,876 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 11:59:26,483 (trainer:732) INFO: 26epoch:train:2501-2600batch: iter_time=1.206, forward_time=0.146, loss_ctc=72.363, loss_att=51.578, acc=0.716, loss=57.813, backward_time=1.040, grad_norm=117.065, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.179e-05, train_time=6.733 +[gpub005:0/64] 2023-07-08 12:01:42,517 (trainer:732) INFO: 26epoch:train:2601-2700batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=68.902, loss_att=59.039, acc=0.707, loss=61.998, backward_time=1.028, grad_norm=96.352, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.177e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 12:03:58,444 (trainer:732) INFO: 26epoch:train:2701-2800batch: iter_time=1.304e-04, forward_time=0.147, loss_ctc=79.158, loss_att=56.178, acc=0.715, loss=63.072, backward_time=1.028, grad_norm=99.364, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.176e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 12:06:14,072 (trainer:732) INFO: 26epoch:train:2801-2900batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=69.645, loss_att=52.512, acc=0.705, loss=57.652, backward_time=1.027, grad_norm=91.072, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.174e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 12:08:29,717 (trainer:732) INFO: 26epoch:train:2901-3000batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=71.834, loss_att=56.794, acc=0.687, loss=61.306, backward_time=1.027, grad_norm=93.266, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.173e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 12:10:45,477 (trainer:732) INFO: 26epoch:train:3001-3100batch: iter_time=1.273e-04, forward_time=0.146, loss_ctc=81.950, loss_att=63.128, acc=0.688, loss=68.774, backward_time=1.029, grad_norm=123.293, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.171e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 12:13:01,110 (trainer:732) INFO: 26epoch:train:3101-3200batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=72.267, loss_att=57.245, acc=0.702, loss=61.751, backward_time=1.027, grad_norm=98.018, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.170e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 12:15:16,678 (trainer:732) INFO: 26epoch:train:3201-3300batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=69.021, loss_att=54.141, acc=0.701, loss=58.605, backward_time=1.027, grad_norm=97.100, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.168e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 12:16:04,551 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 12:16:23,248 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 12:16:26,719 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 12:16:26,719 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 12:16:26,725 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 12:20:37,309 (trainer:732) INFO: 26epoch:train:3301-3400batch: iter_time=1.265, forward_time=0.147, loss_ctc=73.247, loss_att=53.851, acc=0.718, loss=59.669, backward_time=1.041, grad_norm=101.473, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.167e-05, train_time=6.412 +[gpub005:0/64] 2023-07-08 12:22:53,523 (trainer:732) INFO: 26epoch:train:3401-3500batch: iter_time=1.317e-04, forward_time=0.146, loss_ctc=67.909, loss_att=50.412, acc=0.701, loss=55.661, backward_time=1.026, grad_norm=114.248, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.166e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 12:25:12,032 (trainer:732) INFO: 26epoch:train:3501-3600batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=72.018, loss_att=57.627, acc=0.696, loss=61.944, backward_time=1.031, grad_norm=94.403, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.164e-05, train_time=2.770 +[gpub005:0/64] 2023-07-08 12:27:27,429 (trainer:732) INFO: 26epoch:train:3601-3700batch: iter_time=1.369e-04, forward_time=0.145, loss_ctc=76.212, loss_att=57.567, acc=0.692, loss=63.161, backward_time=1.026, grad_norm=110.612, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.163e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 12:29:43,097 (trainer:732) INFO: 26epoch:train:3701-3800batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=73.717, loss_att=54.219, acc=0.698, loss=60.068, backward_time=1.027, grad_norm=105.595, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.161e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 12:32:01,979 (trainer:732) INFO: 26epoch:train:3801-3900batch: iter_time=1.318e-04, forward_time=0.148, loss_ctc=75.248, loss_att=55.956, acc=0.692, loss=61.744, backward_time=1.028, grad_norm=155.550, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.160e-05, train_time=2.777 +[gpub005:0/64] 2023-07-08 12:34:19,829 (trainer:732) INFO: 26epoch:train:3901-4000batch: iter_time=1.284e-04, forward_time=0.147, loss_ctc=81.079, loss_att=65.252, acc=0.669, loss=70.000, backward_time=1.027, grad_norm=104.122, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.158e-05, train_time=2.757 +[gpub005:0/64] 2023-07-08 12:36:41,064 (trainer:732) INFO: 26epoch:train:4001-4100batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=71.146, loss_att=50.789, acc=0.703, loss=56.897, backward_time=1.035, grad_norm=105.814, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.157e-05, train_time=2.824 +[gpub005:0/64] 2023-07-08 12:38:22,077 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 12:38:40,154 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 12:38:43,616 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 12:38:43,616 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 12:38:43,622 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 12:44:13,729 (trainer:732) INFO: 26epoch:train:4101-4200batch: iter_time=1.254, forward_time=0.146, loss_ctc=65.207, loss_att=53.418, acc=0.701, loss=56.954, backward_time=1.057, grad_norm=94.878, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.155e-05, train_time=9.053 +[gpub005:0/64] 2023-07-08 12:46:31,564 (trainer:732) INFO: 26epoch:train:4201-4300batch: iter_time=1.181e-04, forward_time=0.147, loss_ctc=69.637, loss_att=51.000, acc=0.720, loss=56.591, backward_time=1.028, grad_norm=91.646, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.154e-05, train_time=2.756 +[gpub005:0/64] 2023-07-08 12:48:47,826 (trainer:732) INFO: 26epoch:train:4301-4400batch: iter_time=1.210e-04, forward_time=0.147, loss_ctc=69.877, loss_att=58.670, acc=0.710, loss=62.032, backward_time=1.031, grad_norm=90.287, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.152e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 12:51:03,869 (trainer:732) INFO: 26epoch:train:4401-4500batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=78.707, loss_att=56.151, acc=0.712, loss=62.918, backward_time=1.028, grad_norm=100.064, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.151e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 12:53:19,561 (trainer:732) INFO: 26epoch:train:4501-4600batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=72.489, loss_att=53.880, acc=0.706, loss=59.463, backward_time=1.028, grad_norm=96.597, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.149e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 12:55:35,638 (trainer:732) INFO: 26epoch:train:4601-4700batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=69.209, loss_att=55.959, acc=0.692, loss=59.934, backward_time=1.030, grad_norm=104.066, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.148e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 12:57:51,477 (trainer:732) INFO: 26epoch:train:4701-4800batch: iter_time=1.274e-04, forward_time=0.147, loss_ctc=79.306, loss_att=59.281, acc=0.689, loss=65.288, backward_time=1.028, grad_norm=107.090, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.146e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 13:00:07,474 (trainer:732) INFO: 26epoch:train:4801-4900batch: iter_time=1.384e-04, forward_time=0.147, loss_ctc=73.038, loss_att=57.206, acc=0.700, loss=61.956, backward_time=1.030, grad_norm=99.761, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.145e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 13:02:23,155 (trainer:732) INFO: 26epoch:train:4901-5000batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=69.379, loss_att=53.802, acc=0.705, loss=58.475, backward_time=1.028, grad_norm=104.017, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.144e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 13:02:24,540 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 13:02:42,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 13:02:46,436 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 13:02:46,436 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 13:02:46,442 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 13:07:28,194 (trainer:732) INFO: 26epoch:train:5001-5100batch: iter_time=1.260, forward_time=0.147, loss_ctc=73.292, loss_att=50.325, acc=0.720, loss=57.215, backward_time=1.044, grad_norm=94.971, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.142e-05, train_time=6.101 +[gpub005:0/64] 2023-07-08 13:09:44,725 (trainer:732) INFO: 26epoch:train:5101-5200batch: iter_time=1.017e-04, forward_time=0.146, loss_ctc=69.927, loss_att=58.461, acc=0.708, loss=61.901, backward_time=1.030, grad_norm=103.765, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.141e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 13:12:00,343 (trainer:732) INFO: 26epoch:train:5201-5300batch: iter_time=9.994e-05, forward_time=0.145, loss_ctc=80.359, loss_att=56.264, acc=0.712, loss=63.492, backward_time=1.026, grad_norm=116.586, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.139e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 13:14:16,318 (trainer:732) INFO: 26epoch:train:5301-5400batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=69.708, loss_att=52.973, acc=0.705, loss=57.994, backward_time=1.028, grad_norm=105.492, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.138e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 13:16:31,923 (trainer:732) INFO: 26epoch:train:5401-5500batch: iter_time=1.216e-04, forward_time=0.148, loss_ctc=70.245, loss_att=55.219, acc=0.686, loss=59.727, backward_time=1.026, grad_norm=110.833, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.136e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 13:18:47,558 (trainer:732) INFO: 26epoch:train:5501-5600batch: iter_time=1.173e-04, forward_time=0.148, loss_ctc=80.307, loss_att=60.895, acc=0.691, loss=66.719, backward_time=1.026, grad_norm=99.104, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.135e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 13:21:03,349 (trainer:732) INFO: 26epoch:train:5601-5700batch: iter_time=1.161e-04, forward_time=0.148, loss_ctc=70.927, loss_att=55.692, acc=0.706, loss=60.262, backward_time=1.027, grad_norm=104.300, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.133e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 13:23:19,161 (trainer:732) INFO: 26epoch:train:5701-5800batch: iter_time=1.018e-04, forward_time=0.146, loss_ctc=67.813, loss_att=52.632, acc=0.705, loss=57.186, backward_time=1.026, grad_norm=101.610, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.132e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 13:24:06,674 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 13:24:24,853 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 13:24:28,314 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 13:24:28,315 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-08 13:24:28,321 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 13:29:09,299 (trainer:732) INFO: 26epoch:train:5801-5900batch: iter_time=1.271, forward_time=0.172, loss_ctc=73.165, loss_att=53.526, acc=0.722, loss=59.418, backward_time=1.037, grad_norm=95.823, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.185, optim0_lr0=7.130e-05, train_time=7.002 +[gpub005:0/64] 2023-07-08 13:31:28,366 (trainer:732) INFO: 26epoch:train:5901-6000batch: iter_time=1.015e-04, forward_time=0.147, loss_ctc=67.679, loss_att=48.515, acc=0.726, loss=54.264, backward_time=1.030, grad_norm=99.757, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.129e-05, train_time=2.782 +[gpub005:0/64] 2023-07-08 13:33:57,056 (trainer:732) INFO: 26epoch:train:6001-6100batch: iter_time=1.045e-04, forward_time=0.146, loss_ctc=72.320, loss_att=57.931, acc=0.702, loss=62.248, backward_time=1.035, grad_norm=96.806, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.128e-05, train_time=2.974 +[gpub005:0/64] 2023-07-08 13:36:18,221 (trainer:732) INFO: 26epoch:train:6101-6200batch: iter_time=1.158e-04, forward_time=0.146, loss_ctc=74.820, loss_att=56.437, acc=0.706, loss=61.952, backward_time=1.041, grad_norm=104.704, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.126e-05, train_time=2.823 +[gpub005:0/64] 2023-07-08 13:38:37,328 (trainer:732) INFO: 26epoch:train:6201-6300batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=74.027, loss_att=54.430, acc=0.705, loss=60.309, backward_time=1.037, grad_norm=95.467, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.125e-05, train_time=2.782 +[gpub005:0/64] 2023-07-08 13:40:53,195 (trainer:732) INFO: 26epoch:train:6301-6400batch: iter_time=1.223e-04, forward_time=0.147, loss_ctc=74.703, loss_att=56.062, acc=0.699, loss=61.654, backward_time=1.027, grad_norm=102.603, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.123e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 13:43:09,362 (trainer:732) INFO: 26epoch:train:6401-6500batch: iter_time=1.088e-04, forward_time=0.147, loss_ctc=78.713, loss_att=64.969, acc=0.686, loss=69.093, backward_time=1.028, grad_norm=104.530, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.122e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 13:45:28,847 (trainer:732) INFO: 26epoch:train:6501-6600batch: iter_time=1.245e-04, forward_time=0.147, loss_ctc=69.965, loss_att=51.127, acc=0.717, loss=56.778, backward_time=1.043, grad_norm=82.428, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.120e-05, train_time=2.789 +[gpub005:0/64] 2023-07-08 13:47:05,544 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 13:47:23,796 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 13:47:27,253 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 13:47:27,253 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 13:47:27,260 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 13:50:28,307 (trainer:732) INFO: 26epoch:train:6601-6700batch: iter_time=1.259, forward_time=0.147, loss_ctc=64.311, loss_att=52.844, acc=0.702, loss=56.284, backward_time=1.044, grad_norm=99.413, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.119e-05, train_time=5.989 +[gpub005:0/64] 2023-07-08 13:52:44,647 (trainer:732) INFO: 26epoch:train:6701-6800batch: iter_time=1.222e-04, forward_time=0.146, loss_ctc=69.999, loss_att=51.589, acc=0.707, loss=57.112, backward_time=1.028, grad_norm=108.206, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.117e-05, train_time=2.727 +[gpub005:0/64] 2023-07-08 13:55:00,535 (trainer:732) INFO: 26epoch:train:6801-6900batch: iter_time=1.248e-04, forward_time=0.147, loss_ctc=68.683, loss_att=58.666, acc=0.702, loss=61.671, backward_time=1.027, grad_norm=95.657, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.116e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 13:57:16,106 (trainer:732) INFO: 26epoch:train:6901-7000batch: iter_time=1.367e-04, forward_time=0.146, loss_ctc=79.555, loss_att=57.273, acc=0.699, loss=63.958, backward_time=1.027, grad_norm=106.510, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.115e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 13:59:31,764 (trainer:732) INFO: 26epoch:train:7001-7100batch: iter_time=1.325e-04, forward_time=0.147, loss_ctc=71.814, loss_att=53.324, acc=0.706, loss=58.871, backward_time=1.027, grad_norm=103.081, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.113e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 14:01:47,399 (trainer:732) INFO: 26epoch:train:7101-7200batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=69.657, loss_att=56.274, acc=0.687, loss=60.289, backward_time=1.027, grad_norm=96.955, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.112e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 14:04:03,309 (trainer:732) INFO: 26epoch:train:7201-7300batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=77.898, loss_att=59.498, acc=0.680, loss=65.018, backward_time=1.029, grad_norm=113.679, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.110e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 14:06:19,024 (trainer:732) INFO: 26epoch:train:7301-7400batch: iter_time=1.125e-04, forward_time=0.146, loss_ctc=72.524, loss_att=56.616, acc=0.693, loss=61.388, backward_time=1.028, grad_norm=93.674, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.109e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 14:08:34,660 (trainer:732) INFO: 26epoch:train:7401-7500batch: iter_time=1.323e-04, forward_time=0.146, loss_ctc=69.537, loss_att=53.600, acc=0.701, loss=58.382, backward_time=1.027, grad_norm=106.828, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.107e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 14:08:35,961 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 14:08:54,239 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 14:08:57,934 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 14:08:57,934 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 14:08:57,940 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 14:15:22,244 (trainer:732) INFO: 26epoch:train:7501-7600batch: iter_time=1.758, forward_time=0.148, loss_ctc=68.459, loss_att=51.191, acc=0.727, loss=56.372, backward_time=1.038, grad_norm=87.229, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.106e-05, train_time=8.151 +[gpub005:0/64] 2023-07-08 14:17:40,957 (trainer:732) INFO: 26epoch:train:7601-7700batch: iter_time=1.404e-04, forward_time=0.147, loss_ctc=68.155, loss_att=52.137, acc=0.719, loss=56.943, backward_time=1.029, grad_norm=151.653, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.104e-05, train_time=2.774 +[gpub005:0/64] 2023-07-08 14:19:57,108 (trainer:732) INFO: 26epoch:train:7701-7800batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=75.907, loss_att=59.882, acc=0.701, loss=64.690, backward_time=1.028, grad_norm=99.037, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.103e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 14:22:12,912 (trainer:732) INFO: 26epoch:train:7801-7900batch: iter_time=1.356e-04, forward_time=0.147, loss_ctc=70.855, loss_att=54.076, acc=0.710, loss=59.110, backward_time=1.028, grad_norm=103.081, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.102e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 14:24:30,607 (trainer:732) INFO: 26epoch:train:7901-8000batch: iter_time=1.361e-04, forward_time=0.148, loss_ctc=68.206, loss_att=51.104, acc=0.702, loss=56.235, backward_time=1.031, grad_norm=99.810, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.100e-05, train_time=2.754 +[gpub005:0/64] 2023-07-08 14:26:47,209 (trainer:732) INFO: 26epoch:train:8001-8100batch: iter_time=1.410e-04, forward_time=0.147, loss_ctc=82.712, loss_att=63.271, acc=0.695, loss=69.104, backward_time=1.026, grad_norm=111.254, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.099e-05, train_time=2.732 +[gpub005:0/64] 2023-07-08 14:29:11,879 (trainer:732) INFO: 26epoch:train:8101-8200batch: iter_time=1.378e-04, forward_time=0.147, loss_ctc=73.581, loss_att=58.566, acc=0.694, loss=63.071, backward_time=1.037, grad_norm=97.879, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.097e-05, train_time=2.893 +[gpub005:0/64] 2023-07-08 14:31:27,869 (trainer:732) INFO: 26epoch:train:8201-8300batch: iter_time=1.301e-04, forward_time=0.147, loss_ctc=73.403, loss_att=55.466, acc=0.709, loss=60.847, backward_time=1.028, grad_norm=102.034, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.096e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 14:32:17,789 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 14:32:35,784 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 14:32:39,537 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 14:32:39,537 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 14:32:39,543 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 14:38:06,476 (trainer:732) INFO: 26epoch:train:8301-8400batch: iter_time=1.307, forward_time=0.146, loss_ctc=68.148, loss_att=52.608, acc=0.711, loss=57.270, backward_time=1.039, grad_norm=125.284, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.094e-05, train_time=7.972 +[gpub005:0/64] 2023-07-08 14:40:23,154 (trainer:732) INFO: 26epoch:train:8401-8500batch: iter_time=1.212e-04, forward_time=0.147, loss_ctc=68.148, loss_att=52.855, acc=0.701, loss=57.443, backward_time=1.027, grad_norm=93.761, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.093e-05, train_time=2.733 +[gpub005:0/64] 2023-07-08 14:42:39,471 (trainer:732) INFO: 26epoch:train:8501-8600batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=75.352, loss_att=59.373, acc=0.694, loss=64.167, backward_time=1.026, grad_norm=104.938, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.092e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 14:44:55,183 (trainer:732) INFO: 26epoch:train:8601-8700batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=71.071, loss_att=53.446, acc=0.700, loss=58.734, backward_time=1.026, grad_norm=98.544, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.090e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 14:47:10,603 (trainer:732) INFO: 26epoch:train:8701-8800batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=73.000, loss_att=54.481, acc=0.691, loss=60.037, backward_time=1.026, grad_norm=114.647, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.089e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 14:49:26,242 (trainer:732) INFO: 26epoch:train:8801-8900batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=76.952, loss_att=59.376, acc=0.693, loss=64.649, backward_time=1.028, grad_norm=98.987, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.087e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 14:51:41,715 (trainer:732) INFO: 26epoch:train:8901-9000batch: iter_time=1.081e-04, forward_time=0.146, loss_ctc=78.083, loss_att=61.980, acc=0.678, loss=66.811, backward_time=1.026, grad_norm=108.739, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.086e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 14:53:57,229 (trainer:732) INFO: 26epoch:train:9001-9100batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=70.526, loss_att=52.858, acc=0.705, loss=58.158, backward_time=1.026, grad_norm=99.831, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.084e-05, train_time=2.710 +[gpub005:0/64] 2023-07-08 14:55:29,936 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 14:55:48,203 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 14:55:51,676 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 14:55:51,676 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 14:55:51,682 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 14:59:11,906 (trainer:732) INFO: 26epoch:train:9101-9200batch: iter_time=1.673, forward_time=0.148, loss_ctc=69.871, loss_att=52.356, acc=0.707, loss=57.611, backward_time=1.042, grad_norm=126.560, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.083e-05, train_time=6.293 +[gpub005:0/64] 2023-07-08 15:01:28,565 (trainer:732) INFO: 26epoch:train:9201-9300batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=71.189, loss_att=51.918, acc=0.707, loss=57.699, backward_time=1.029, grad_norm=123.852, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.082e-05, train_time=2.733 +[gpub005:0/64] 2023-07-08 15:03:47,418 (trainer:732) INFO: 26epoch:train:9301-9400batch: iter_time=1.201e-04, forward_time=0.148, loss_ctc=67.872, loss_att=55.829, acc=0.707, loss=59.442, backward_time=1.033, grad_norm=91.349, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.080e-05, train_time=2.777 +[gpub005:0/64] 2023-07-08 15:06:06,242 (trainer:732) INFO: 26epoch:train:9401-9500batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=78.903, loss_att=56.094, acc=0.705, loss=62.937, backward_time=1.030, grad_norm=96.669, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.079e-05, train_time=2.776 +[gpub005:0/64] 2023-07-08 15:08:22,234 (trainer:732) INFO: 26epoch:train:9501-9600batch: iter_time=1.098e-04, forward_time=0.147, loss_ctc=71.778, loss_att=52.472, acc=0.708, loss=58.264, backward_time=1.028, grad_norm=100.964, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.077e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 15:10:42,121 (trainer:732) INFO: 26epoch:train:9601-9700batch: iter_time=1.164e-04, forward_time=0.147, loss_ctc=69.666, loss_att=56.280, acc=0.686, loss=60.296, backward_time=1.041, grad_norm=97.150, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.076e-05, train_time=2.798 +[gpub005:0/64] 2023-07-08 15:13:00,926 (trainer:732) INFO: 26epoch:train:9701-9800batch: iter_time=1.106e-04, forward_time=0.147, loss_ctc=77.484, loss_att=59.398, acc=0.680, loss=64.824, backward_time=1.034, grad_norm=104.554, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.075e-05, train_time=2.776 +[gpub005:0/64] 2023-07-08 15:15:25,022 (trainer:732) INFO: 26epoch:train:9801-9900batch: iter_time=1.269e-04, forward_time=0.157, loss_ctc=69.791, loss_att=54.546, acc=0.702, loss=59.120, backward_time=1.041, grad_norm=91.940, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.186, optim0_lr0=7.073e-05, train_time=2.882 +[gpub005:0/64] 2023-07-08 15:17:40,647 (trainer:732) INFO: 26epoch:train:9901-10000batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=67.648, loss_att=52.440, acc=0.704, loss=57.002, backward_time=1.025, grad_norm=92.639, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.072e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 15:29:42,436 (trainer:338) INFO: 26epoch results: [train] iter_time=0.204, forward_time=0.147, loss_ctc=72.927, loss_att=55.820, acc=0.700, loss=60.952, backward_time=1.031, grad_norm=104.257, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.143e-05, train_time=3.302, time=4 hours, 35 minutes and 23.69 seconds, total_count=230000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.054, cer_ctc=0.276, loss_att=40.224, acc=0.655, cer=0.436, wer=0.999, loss=42.573, time=5 minutes and 54.42 seconds, total_count=23782, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 57.87 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-08 15:29:57,930 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-08 15:29:57,939 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/21epoch.pth +[gpub005:0/64] 2023-07-08 15:29:57,980 (trainer:272) INFO: 27/30epoch started. Estimated time to finish: 19 hours, 20 minutes and 32.41 seconds +[gpub005:0/64] 2023-07-08 15:29:58,046 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 15:30:15,652 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 15:30:18,988 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 15:30:18,988 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 15:30:18,994 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 15:35:04,256 (trainer:732) INFO: 27epoch:train:1-100batch: iter_time=1.647, forward_time=0.168, loss_ctc=78.316, loss_att=58.961, acc=0.688, loss=64.768, backward_time=1.042, grad_norm=106.995, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.185, optim0_lr0=7.070e-05, train_time=6.125 +[gpub005:0/64] 2023-07-08 15:37:21,077 (trainer:732) INFO: 27epoch:train:101-200batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=65.781, loss_att=51.624, acc=0.711, loss=55.871, backward_time=1.029, grad_norm=94.992, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.069e-05, train_time=2.736 +[gpub005:0/64] 2023-07-08 15:39:39,144 (trainer:732) INFO: 27epoch:train:201-300batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=68.849, loss_att=52.320, acc=0.684, loss=57.279, backward_time=1.027, grad_norm=92.602, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.067e-05, train_time=2.761 +[gpub005:0/64] 2023-07-08 15:41:55,639 (trainer:732) INFO: 27epoch:train:301-400batch: iter_time=1.117e-04, forward_time=0.147, loss_ctc=89.419, loss_att=68.861, acc=0.699, loss=75.029, backward_time=1.031, grad_norm=131.850, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.066e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 15:44:12,562 (trainer:732) INFO: 27epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.146, loss_ctc=71.286, loss_att=59.711, acc=0.692, loss=63.183, backward_time=1.032, grad_norm=96.921, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.065e-05, train_time=2.738 +[gpub005:0/64] 2023-07-08 15:46:35,966 (trainer:732) INFO: 27epoch:train:501-600batch: iter_time=1.069e-04, forward_time=0.144, loss_ctc=63.026, loss_att=47.249, acc=0.702, loss=51.982, backward_time=1.050, grad_norm=101.613, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.063e-05, train_time=2.868 +[gpub005:0/64] 2023-07-08 15:48:57,030 (trainer:732) INFO: 27epoch:train:601-700batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=78.906, loss_att=55.763, acc=0.690, loss=62.706, backward_time=1.033, grad_norm=105.517, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.062e-05, train_time=2.821 +[gpub005:0/64] 2023-07-08 15:51:15,464 (trainer:732) INFO: 27epoch:train:701-800batch: iter_time=1.760e-04, forward_time=0.160, loss_ctc=71.619, loss_att=57.821, acc=0.699, loss=61.960, backward_time=1.029, grad_norm=99.845, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.060e-05, train_time=2.768 +[gpub005:0/64] 2023-07-08 15:52:10,389 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 15:52:28,184 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 15:52:31,555 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 15:52:31,555 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 15:52:31,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 15:56:21,957 (trainer:732) INFO: 27epoch:train:801-900batch: iter_time=1.505, forward_time=0.162, loss_ctc=78.800, loss_att=57.410, acc=0.692, loss=63.827, backward_time=1.046, grad_norm=114.718, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.059e-05, train_time=6.130 +[gpub005:0/64] 2023-07-08 15:58:38,204 (trainer:732) INFO: 27epoch:train:901-1000batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=70.277, loss_att=51.641, acc=0.699, loss=57.232, backward_time=1.026, grad_norm=108.240, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.058e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 16:00:54,182 (trainer:732) INFO: 27epoch:train:1001-1100batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=65.909, loss_att=52.213, acc=0.692, loss=56.322, backward_time=1.028, grad_norm=104.226, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.056e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 16:03:13,716 (trainer:732) INFO: 27epoch:train:1101-1200batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=75.613, loss_att=59.388, acc=0.694, loss=64.256, backward_time=1.033, grad_norm=118.002, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.055e-05, train_time=2.790 +[gpub005:0/64] 2023-07-08 16:05:29,735 (trainer:732) INFO: 27epoch:train:1201-1300batch: iter_time=1.145e-04, forward_time=0.147, loss_ctc=81.972, loss_att=64.720, acc=0.683, loss=69.896, backward_time=1.029, grad_norm=113.873, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.053e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 16:07:45,823 (trainer:732) INFO: 27epoch:train:1301-1400batch: iter_time=1.129e-04, forward_time=0.147, loss_ctc=66.096, loss_att=57.013, acc=0.672, loss=59.738, backward_time=1.028, grad_norm=94.201, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.052e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 16:10:01,331 (trainer:732) INFO: 27epoch:train:1401-1500batch: iter_time=1.266e-04, forward_time=0.146, loss_ctc=77.033, loss_att=53.359, acc=0.689, loss=60.461, backward_time=1.026, grad_norm=105.645, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.051e-05, train_time=2.710 +[gpub005:0/64] 2023-07-08 16:12:16,550 (trainer:732) INFO: 27epoch:train:1501-1600batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=64.493, loss_att=49.540, acc=0.689, loss=54.026, backward_time=1.024, grad_norm=104.991, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.049e-05, train_time=2.704 +[gpub005:0/64] 2023-07-08 16:13:52,617 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 16:14:10,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 16:14:14,392 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 16:14:14,392 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 16:14:14,398 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 16:17:12,466 (trainer:732) INFO: 27epoch:train:1601-1700batch: iter_time=1.393, forward_time=0.146, loss_ctc=72.251, loss_att=57.571, acc=0.694, loss=61.975, backward_time=1.037, grad_norm=118.951, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.048e-05, train_time=5.918 +[gpub005:0/64] 2023-07-08 16:19:29,351 (trainer:732) INFO: 27epoch:train:1701-1800batch: iter_time=1.086e-04, forward_time=0.147, loss_ctc=72.839, loss_att=54.944, acc=0.702, loss=60.312, backward_time=1.033, grad_norm=123.622, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.046e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 16:21:45,108 (trainer:732) INFO: 27epoch:train:1801-1900batch: iter_time=1.087e-04, forward_time=0.145, loss_ctc=61.753, loss_att=49.031, acc=0.707, loss=52.847, backward_time=1.027, grad_norm=99.456, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.045e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 16:24:00,983 (trainer:732) INFO: 27epoch:train:1901-2000batch: iter_time=1.038e-04, forward_time=0.145, loss_ctc=79.566, loss_att=60.500, acc=0.699, loss=66.220, backward_time=1.028, grad_norm=103.999, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.044e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 16:26:17,002 (trainer:732) INFO: 27epoch:train:2001-2100batch: iter_time=1.062e-04, forward_time=0.146, loss_ctc=79.269, loss_att=61.205, acc=0.700, loss=66.624, backward_time=1.029, grad_norm=134.425, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.042e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 16:28:33,016 (trainer:732) INFO: 27epoch:train:2101-2200batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=68.277, loss_att=59.362, acc=0.687, loss=62.036, backward_time=1.029, grad_norm=98.602, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.041e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 16:30:48,891 (trainer:732) INFO: 27epoch:train:2201-2300batch: iter_time=1.125e-04, forward_time=0.146, loss_ctc=75.472, loss_att=52.615, acc=0.695, loss=59.472, backward_time=1.028, grad_norm=107.785, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.039e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 16:33:04,669 (trainer:732) INFO: 27epoch:train:2301-2400batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=65.943, loss_att=50.458, acc=0.702, loss=55.104, backward_time=1.027, grad_norm=108.478, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.038e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 16:35:20,434 (trainer:732) INFO: 27epoch:train:2401-2500batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=73.144, loss_att=58.864, acc=0.697, loss=63.148, backward_time=1.027, grad_norm=94.710, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.037e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 16:35:23,306 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 16:35:41,386 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 16:35:44,939 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 16:35:44,939 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 16:35:44,946 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 16:41:04,598 (trainer:732) INFO: 27epoch:train:2501-2600batch: iter_time=1.263, forward_time=0.173, loss_ctc=80.201, loss_att=57.657, acc=0.706, loss=64.420, backward_time=1.041, grad_norm=123.403, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.035e-05, train_time=6.882 +[gpub005:0/64] 2023-07-08 16:43:21,200 (trainer:732) INFO: 27epoch:train:2601-2700batch: iter_time=1.166e-04, forward_time=0.148, loss_ctc=63.267, loss_att=46.459, acc=0.706, loss=51.501, backward_time=1.029, grad_norm=87.589, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.034e-05, train_time=2.732 +[gpub005:0/64] 2023-07-08 16:45:37,296 (trainer:732) INFO: 27epoch:train:2701-2800batch: iter_time=1.226e-04, forward_time=0.148, loss_ctc=68.250, loss_att=52.710, acc=0.702, loss=57.372, backward_time=1.030, grad_norm=105.430, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.032e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 16:47:53,627 (trainer:732) INFO: 27epoch:train:2801-2900batch: iter_time=1.205e-04, forward_time=0.148, loss_ctc=82.041, loss_att=65.147, acc=0.699, loss=70.215, backward_time=1.032, grad_norm=109.986, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.031e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 16:50:09,459 (trainer:732) INFO: 27epoch:train:2901-3000batch: iter_time=1.224e-04, forward_time=0.148, loss_ctc=73.529, loss_att=60.131, acc=0.703, loss=64.150, backward_time=1.028, grad_norm=96.408, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.030e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 16:52:25,206 (trainer:732) INFO: 27epoch:train:3001-3100batch: iter_time=1.059e-04, forward_time=0.147, loss_ctc=60.358, loss_att=49.299, acc=0.695, loss=52.616, backward_time=1.028, grad_norm=102.512, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.028e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 16:54:41,021 (trainer:732) INFO: 27epoch:train:3101-3200batch: iter_time=1.048e-04, forward_time=0.148, loss_ctc=78.368, loss_att=54.425, acc=0.688, loss=61.608, backward_time=1.028, grad_norm=94.380, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.027e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 16:56:56,755 (trainer:732) INFO: 27epoch:train:3201-3300batch: iter_time=1.043e-04, forward_time=0.147, loss_ctc=68.893, loss_att=56.211, acc=0.703, loss=60.016, backward_time=1.027, grad_norm=106.588, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.026e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 16:57:52,426 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 16:58:10,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 16:58:14,326 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 16:58:14,326 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-08 16:58:14,332 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 17:03:42,753 (trainer:732) INFO: 27epoch:train:3301-3400batch: iter_time=1.708, forward_time=0.146, loss_ctc=79.437, loss_att=60.997, acc=0.686, loss=66.529, backward_time=1.042, grad_norm=148.834, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.024e-05, train_time=8.120 +[gpub005:0/64] 2023-07-08 17:05:58,787 (trainer:732) INFO: 27epoch:train:3401-3500batch: iter_time=1.422e-04, forward_time=0.145, loss_ctc=63.091, loss_att=46.527, acc=0.710, loss=51.497, backward_time=1.026, grad_norm=97.048, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.023e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 17:08:14,662 (trainer:732) INFO: 27epoch:train:3501-3600batch: iter_time=1.409e-04, forward_time=0.146, loss_ctc=65.612, loss_att=50.551, acc=0.702, loss=55.069, backward_time=1.028, grad_norm=90.485, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.021e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 17:10:30,437 (trainer:732) INFO: 27epoch:train:3601-3700batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=86.543, loss_att=66.397, acc=0.689, loss=72.441, backward_time=1.027, grad_norm=123.487, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.020e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 17:12:46,222 (trainer:732) INFO: 27epoch:train:3701-3800batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=72.226, loss_att=59.576, acc=0.683, loss=63.371, backward_time=1.029, grad_norm=110.341, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.019e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 17:15:01,954 (trainer:732) INFO: 27epoch:train:3801-3900batch: iter_time=1.262e-04, forward_time=0.147, loss_ctc=62.765, loss_att=50.118, acc=0.692, loss=53.912, backward_time=1.026, grad_norm=108.059, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.017e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 17:17:17,653 (trainer:732) INFO: 27epoch:train:3901-4000batch: iter_time=1.348e-04, forward_time=0.146, loss_ctc=79.044, loss_att=57.038, acc=0.681, loss=63.640, backward_time=1.027, grad_norm=113.104, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.016e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 17:19:33,486 (trainer:732) INFO: 27epoch:train:4001-4100batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.698, loss_att=54.565, acc=0.698, loss=58.505, backward_time=1.027, grad_norm=87.633, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.014e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 17:21:07,242 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 17:21:25,710 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 17:21:29,182 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 17:21:29,182 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 17:21:29,188 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 17:25:07,290 (trainer:732) INFO: 27epoch:train:4101-4200batch: iter_time=1.312, forward_time=0.147, loss_ctc=74.376, loss_att=54.791, acc=0.701, loss=60.666, backward_time=1.035, grad_norm=132.213, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.013e-05, train_time=6.676 +[gpub005:0/64] 2023-07-08 17:27:23,791 (trainer:732) INFO: 27epoch:train:4201-4300batch: iter_time=1.339e-04, forward_time=0.147, loss_ctc=70.773, loss_att=52.734, acc=0.694, loss=58.146, backward_time=1.029, grad_norm=107.084, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.012e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 17:29:39,208 (trainer:732) INFO: 27epoch:train:4301-4400batch: iter_time=1.474e-04, forward_time=0.146, loss_ctc=64.956, loss_att=51.106, acc=0.710, loss=55.261, backward_time=1.025, grad_norm=85.824, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.010e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 17:31:59,903 (trainer:732) INFO: 27epoch:train:4401-4500batch: iter_time=1.395e-04, forward_time=0.147, loss_ctc=68.008, loss_att=54.043, acc=0.690, loss=58.233, backward_time=1.031, grad_norm=87.203, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.009e-05, train_time=2.814 +[gpub005:0/64] 2023-07-08 17:34:15,707 (trainer:732) INFO: 27epoch:train:4501-4600batch: iter_time=1.354e-04, forward_time=0.147, loss_ctc=83.914, loss_att=63.789, acc=0.692, loss=69.826, backward_time=1.027, grad_norm=114.171, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.008e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 17:36:31,380 (trainer:732) INFO: 27epoch:train:4601-4700batch: iter_time=1.455e-04, forward_time=0.148, loss_ctc=69.598, loss_att=58.292, acc=0.684, loss=61.684, backward_time=1.026, grad_norm=93.514, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.006e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 17:38:47,070 (trainer:732) INFO: 27epoch:train:4701-4800batch: iter_time=1.468e-04, forward_time=0.148, loss_ctc=62.231, loss_att=46.924, acc=0.696, loss=51.516, backward_time=1.027, grad_norm=93.136, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.005e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 17:41:07,645 (trainer:732) INFO: 27epoch:train:4801-4900batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=74.797, loss_att=53.529, acc=0.688, loss=59.909, backward_time=1.028, grad_norm=106.798, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.003e-05, train_time=2.811 +[gpub005:0/64] 2023-07-08 17:43:23,525 (trainer:732) INFO: 27epoch:train:4901-5000batch: iter_time=9.656e-05, forward_time=0.145, loss_ctc=69.547, loss_att=55.357, acc=0.699, loss=59.614, backward_time=1.027, grad_norm=88.177, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.002e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 17:43:26,168 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 17:43:44,478 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 17:43:47,904 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 17:43:47,905 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 17:43:47,911 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 17:48:43,183 (trainer:732) INFO: 27epoch:train:5001-5100batch: iter_time=1.300, forward_time=0.156, loss_ctc=81.007, loss_att=59.315, acc=0.698, loss=65.823, backward_time=1.042, grad_norm=126.574, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.001e-05, train_time=6.393 +[gpub005:0/64] 2023-07-08 17:50:59,355 (trainer:732) INFO: 27epoch:train:5101-5200batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=63.117, loss_att=45.507, acc=0.712, loss=50.790, backward_time=1.029, grad_norm=105.642, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.999e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 17:53:15,220 (trainer:732) INFO: 27epoch:train:5201-5300batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=66.674, loss_att=51.779, acc=0.705, loss=56.248, backward_time=1.028, grad_norm=93.017, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.998e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 17:55:31,355 (trainer:732) INFO: 27epoch:train:5301-5400batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=80.758, loss_att=65.051, acc=0.690, loss=69.763, backward_time=1.028, grad_norm=115.695, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.997e-05, train_time=2.722 +[gpub005:0/64] 2023-07-08 17:57:46,927 (trainer:732) INFO: 27epoch:train:5401-5500batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=74.950, loss_att=60.692, acc=0.688, loss=64.970, backward_time=1.026, grad_norm=100.041, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.995e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 18:00:02,552 (trainer:732) INFO: 27epoch:train:5501-5600batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=60.364, loss_att=48.962, acc=0.693, loss=52.383, backward_time=1.025, grad_norm=89.373, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.994e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 18:02:18,130 (trainer:732) INFO: 27epoch:train:5601-5700batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=77.107, loss_att=53.744, acc=0.684, loss=60.753, backward_time=1.027, grad_norm=102.369, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.992e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 18:04:33,692 (trainer:732) INFO: 27epoch:train:5701-5800batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=67.366, loss_att=53.893, acc=0.700, loss=57.935, backward_time=1.026, grad_norm=103.715, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.991e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 18:05:27,389 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 18:05:45,505 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 18:05:48,945 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 18:05:48,946 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-08 18:05:48,952 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 18:09:46,628 (trainer:732) INFO: 27epoch:train:5801-5900batch: iter_time=1.481, forward_time=0.206, loss_ctc=77.422, loss_att=54.274, acc=0.707, loss=61.219, backward_time=1.053, grad_norm=108.922, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.187, optim0_lr0=6.990e-05, train_time=6.259 +[gpub005:0/64] 2023-07-08 18:12:03,207 (trainer:732) INFO: 27epoch:train:5901-6000batch: iter_time=1.133e-04, forward_time=0.147, loss_ctc=67.668, loss_att=49.007, acc=0.719, loss=54.605, backward_time=1.029, grad_norm=105.149, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.988e-05, train_time=2.731 +[gpub005:0/64] 2023-07-08 18:14:18,983 (trainer:732) INFO: 27epoch:train:6001-6100batch: iter_time=1.019e-04, forward_time=0.146, loss_ctc=65.649, loss_att=52.644, acc=0.704, loss=56.546, backward_time=1.029, grad_norm=82.415, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.987e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 18:16:35,266 (trainer:732) INFO: 27epoch:train:6101-6200batch: iter_time=1.118e-04, forward_time=0.147, loss_ctc=74.745, loss_att=57.661, acc=0.710, loss=62.786, backward_time=1.032, grad_norm=103.839, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.986e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 18:18:51,419 (trainer:732) INFO: 27epoch:train:6201-6300batch: iter_time=1.098e-04, forward_time=0.147, loss_ctc=80.242, loss_att=63.110, acc=0.705, loss=68.249, backward_time=1.031, grad_norm=123.740, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.984e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 18:21:07,449 (trainer:732) INFO: 27epoch:train:6301-6400batch: iter_time=1.090e-04, forward_time=0.147, loss_ctc=64.917, loss_att=56.070, acc=0.689, loss=58.724, backward_time=1.029, grad_norm=94.389, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.983e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 18:23:23,293 (trainer:732) INFO: 27epoch:train:6401-6500batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=75.776, loss_att=53.223, acc=0.701, loss=59.989, backward_time=1.026, grad_norm=128.056, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.982e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 18:25:41,985 (trainer:732) INFO: 27epoch:train:6501-6600batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=63.831, loss_att=48.758, acc=0.705, loss=53.280, backward_time=1.030, grad_norm=111.720, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.980e-05, train_time=2.774 +[gpub005:0/64] 2023-07-08 18:27:20,934 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 18:27:39,116 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 18:27:42,618 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 18:27:42,618 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 18:27:42,625 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 18:31:54,283 (trainer:732) INFO: 27epoch:train:6601-6700batch: iter_time=1.661, forward_time=0.168, loss_ctc=78.094, loss_att=55.859, acc=0.715, loss=62.530, backward_time=1.038, grad_norm=110.280, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.979e-05, train_time=7.445 +[gpub005:0/64] 2023-07-08 18:34:11,650 (trainer:732) INFO: 27epoch:train:6701-6800batch: iter_time=1.183e-04, forward_time=0.147, loss_ctc=71.392, loss_att=52.314, acc=0.710, loss=58.037, backward_time=1.032, grad_norm=109.454, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.977e-05, train_time=2.748 +[gpub005:0/64] 2023-07-08 18:36:27,985 (trainer:732) INFO: 27epoch:train:6801-6900batch: iter_time=1.155e-04, forward_time=0.147, loss_ctc=64.101, loss_att=51.370, acc=0.716, loss=55.189, backward_time=1.029, grad_norm=109.963, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.976e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 18:38:44,206 (trainer:732) INFO: 27epoch:train:6901-7000batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=68.462, loss_att=54.516, acc=0.697, loss=58.700, backward_time=1.029, grad_norm=85.796, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.975e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 18:41:00,068 (trainer:732) INFO: 27epoch:train:7001-7100batch: iter_time=1.081e-04, forward_time=0.146, loss_ctc=84.776, loss_att=62.796, acc=0.708, loss=69.390, backward_time=1.027, grad_norm=105.049, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.973e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 18:43:15,964 (trainer:732) INFO: 27epoch:train:7101-7200batch: iter_time=1.084e-04, forward_time=0.147, loss_ctc=70.156, loss_att=58.438, acc=0.703, loss=61.953, backward_time=1.028, grad_norm=91.886, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.972e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 18:45:31,769 (trainer:732) INFO: 27epoch:train:7201-7300batch: iter_time=1.110e-04, forward_time=0.147, loss_ctc=62.426, loss_att=47.189, acc=0.712, loss=51.760, backward_time=1.027, grad_norm=99.793, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.971e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 18:47:47,563 (trainer:732) INFO: 27epoch:train:7301-7400batch: iter_time=1.171e-04, forward_time=0.146, loss_ctc=74.215, loss_att=52.281, acc=0.700, loss=58.861, backward_time=1.029, grad_norm=103.237, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.969e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 18:50:03,368 (trainer:732) INFO: 27epoch:train:7401-7500batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=69.100, loss_att=56.675, acc=0.701, loss=60.403, backward_time=1.028, grad_norm=93.451, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.968e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 18:50:06,411 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 18:50:24,992 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 18:50:28,681 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 18:50:28,681 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 18:50:28,687 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 18:56:41,552 (trainer:732) INFO: 27epoch:train:7501-7600batch: iter_time=1.468, forward_time=0.146, loss_ctc=79.702, loss_att=58.690, acc=0.704, loss=64.993, backward_time=1.043, grad_norm=117.599, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.967e-05, train_time=7.963 +[gpub005:0/64] 2023-07-08 18:58:59,435 (trainer:732) INFO: 27epoch:train:7601-7700batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=62.698, loss_att=45.702, acc=0.715, loss=50.801, backward_time=1.030, grad_norm=86.375, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.965e-05, train_time=2.758 +[gpub005:0/64] 2023-07-08 19:01:20,171 (trainer:732) INFO: 27epoch:train:7701-7800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=68.524, loss_att=51.985, acc=0.704, loss=56.947, backward_time=1.041, grad_norm=108.704, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.964e-05, train_time=2.815 +[gpub005:0/64] 2023-07-08 19:03:36,317 (trainer:732) INFO: 27epoch:train:7801-7900batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=82.146, loss_att=67.111, acc=0.692, loss=71.621, backward_time=1.030, grad_norm=116.046, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.963e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 19:05:52,107 (trainer:732) INFO: 27epoch:train:7901-8000batch: iter_time=1.352e-04, forward_time=0.146, loss_ctc=71.994, loss_att=57.649, acc=0.695, loss=61.952, backward_time=1.025, grad_norm=131.857, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.961e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 19:08:09,009 (trainer:732) INFO: 27epoch:train:8001-8100batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=61.132, loss_att=49.614, acc=0.689, loss=53.069, backward_time=1.032, grad_norm=103.490, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.960e-05, train_time=2.738 +[gpub005:0/64] 2023-07-08 19:10:25,679 (trainer:732) INFO: 27epoch:train:8101-8200batch: iter_time=1.177e-04, forward_time=0.144, loss_ctc=77.024, loss_att=54.171, acc=0.687, loss=61.027, backward_time=1.027, grad_norm=100.882, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.959e-05, train_time=2.733 +[gpub005:0/64] 2023-07-08 19:12:41,248 (trainer:732) INFO: 27epoch:train:8201-8300batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=66.224, loss_att=53.289, acc=0.703, loss=57.170, backward_time=1.025, grad_norm=103.207, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.957e-05, train_time=2.711 +[gpub005:0/64] 2023-07-08 19:13:32,010 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-08 19:13:50,361 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 19:13:54,092 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 19:13:54,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 19:13:54,099 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 19:19:15,593 (trainer:732) INFO: 27epoch:train:8301-8400batch: iter_time=1.514, forward_time=0.147, loss_ctc=77.360, loss_att=60.550, acc=0.688, loss=65.593, backward_time=1.044, grad_norm=115.246, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.956e-05, train_time=7.887 +[gpub005:0/64] 2023-07-08 19:21:32,453 (trainer:732) INFO: 27epoch:train:8401-8500batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=63.582, loss_att=45.791, acc=0.714, loss=51.128, backward_time=1.026, grad_norm=87.132, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.954e-05, train_time=2.737 +[gpub005:0/64] 2023-07-08 19:23:49,755 (trainer:732) INFO: 27epoch:train:8501-8600batch: iter_time=1.181e-04, forward_time=0.145, loss_ctc=68.442, loss_att=51.771, acc=0.700, loss=56.772, backward_time=1.029, grad_norm=94.365, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.953e-05, train_time=2.746 +[gpub005:0/64] 2023-07-08 19:26:06,069 (trainer:732) INFO: 27epoch:train:8601-8700batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=82.786, loss_att=63.906, acc=0.692, loss=69.570, backward_time=1.027, grad_norm=126.559, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.952e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 19:28:22,597 (trainer:732) INFO: 27epoch:train:8701-8800batch: iter_time=1.244e-04, forward_time=0.147, loss_ctc=70.784, loss_att=58.190, acc=0.689, loss=61.968, backward_time=1.029, grad_norm=121.368, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.950e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 19:30:38,374 (trainer:732) INFO: 27epoch:train:8801-8900batch: iter_time=1.201e-04, forward_time=0.147, loss_ctc=60.373, loss_att=48.428, acc=0.694, loss=52.011, backward_time=1.028, grad_norm=99.372, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.949e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 19:32:53,800 (trainer:732) INFO: 27epoch:train:8901-9000batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=76.119, loss_att=54.793, acc=0.685, loss=61.191, backward_time=1.026, grad_norm=101.698, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.948e-05, train_time=2.708 +[gpub005:0/64] 2023-07-08 19:35:09,452 (trainer:732) INFO: 27epoch:train:9001-9100batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=67.817, loss_att=54.263, acc=0.701, loss=58.329, backward_time=1.028, grad_norm=89.440, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.946e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 19:36:41,580 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-08 19:36:59,716 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 19:37:03,195 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 19:37:03,195 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 19:37:03,201 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 19:41:40,242 (trainer:732) INFO: 27epoch:train:9101-9200batch: iter_time=1.268, forward_time=0.157, loss_ctc=74.223, loss_att=53.143, acc=0.708, loss=59.467, backward_time=1.037, grad_norm=111.466, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.945e-05, train_time=7.816 +[gpub005:0/64] 2023-07-08 19:43:56,604 (trainer:732) INFO: 27epoch:train:9201-9300batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=71.313, loss_att=52.868, acc=0.710, loss=58.401, backward_time=1.029, grad_norm=113.150, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.944e-05, train_time=2.727 +[gpub005:0/64] 2023-07-08 19:46:13,420 (trainer:732) INFO: 27epoch:train:9301-9400batch: iter_time=1.309e-04, forward_time=0.145, loss_ctc=64.237, loss_att=50.871, acc=0.717, loss=54.881, backward_time=1.026, grad_norm=98.725, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.942e-05, train_time=2.736 +[gpub005:0/64] 2023-07-08 19:48:30,885 (trainer:732) INFO: 27epoch:train:9401-9500batch: iter_time=1.306e-04, forward_time=0.145, loss_ctc=67.251, loss_att=53.854, acc=0.700, loss=57.873, backward_time=1.028, grad_norm=86.885, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.941e-05, train_time=2.749 +[gpub005:0/64] 2023-07-08 19:50:47,871 (trainer:732) INFO: 27epoch:train:9501-9600batch: iter_time=1.344e-04, forward_time=0.146, loss_ctc=82.998, loss_att=63.271, acc=0.704, loss=69.189, backward_time=1.029, grad_norm=100.681, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.940e-05, train_time=2.740 +[gpub005:0/64] 2023-07-08 19:53:04,040 (trainer:732) INFO: 27epoch:train:9601-9700batch: iter_time=1.091e-04, forward_time=0.147, loss_ctc=69.726, loss_att=58.788, acc=0.700, loss=62.069, backward_time=1.030, grad_norm=100.765, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.938e-05, train_time=2.723 +[gpub005:0/64] 2023-07-08 19:55:19,699 (trainer:732) INFO: 27epoch:train:9701-9800batch: iter_time=1.084e-04, forward_time=0.147, loss_ctc=62.256, loss_att=46.889, acc=0.713, loss=51.499, backward_time=1.028, grad_norm=99.634, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.937e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 19:57:35,506 (trainer:732) INFO: 27epoch:train:9801-9900batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=73.700, loss_att=52.435, acc=0.704, loss=58.814, backward_time=1.028, grad_norm=105.162, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.936e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 19:59:50,966 (trainer:732) INFO: 27epoch:train:9901-10000batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=69.244, loss_att=56.460, acc=0.701, loss=60.295, backward_time=1.026, grad_norm=95.619, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.934e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 20:13:31,090 (trainer:338) INFO: 27epoch results: [train] iter_time=0.175, forward_time=0.148, loss_ctc=71.614, loss_att=55.101, acc=0.698, loss=60.055, backward_time=1.030, grad_norm=105.167, clip=100.000, loss_scale=7.858e+23, optim_step_time=0.183, optim0_lr0=7.002e-05, train_time=3.238, time=4 hours, 30 minutes and 8.18 seconds, total_count=240000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.741, cer_ctc=0.274, loss_att=40.713, acc=0.655, cer=0.437, wer=1.000, loss=43.121, time=7 minutes and 6.84 seconds, total_count=24794, gpu_max_cached_mem_GB=38.234, [att_plot] time=6 minutes and 18.07 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-08 20:13:46,534 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-08 20:13:46,675 (trainer:272) INFO: 28/30epoch started. Estimated time to finish: 14 hours, 26 minutes and 36.64 seconds +[gpub005:0/64] 2023-07-08 20:13:46,742 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-08 20:14:06,778 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 20:14:10,729 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 20:14:10,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-08 20:14:10,758 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 20:20:40,880 (trainer:732) INFO: 28epoch:train:1-100batch: iter_time=2.719, forward_time=0.161, loss_ctc=74.009, loss_att=55.912, acc=0.677, loss=61.341, backward_time=1.048, grad_norm=106.864, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.187, optim0_lr0=6.933e-05, train_time=8.284 +[gpub005:0/64] 2023-07-08 20:22:58,189 (trainer:732) INFO: 28epoch:train:101-200batch: iter_time=1.283e-04, forward_time=0.146, loss_ctc=77.783, loss_att=59.524, acc=0.688, loss=65.002, backward_time=1.028, grad_norm=110.041, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.932e-05, train_time=2.746 +[gpub005:0/64] 2023-07-08 20:25:14,175 (trainer:732) INFO: 28epoch:train:201-300batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=78.368, loss_att=57.773, acc=0.689, loss=63.952, backward_time=1.027, grad_norm=110.026, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.930e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 20:27:33,894 (trainer:732) INFO: 28epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.166, loss_ctc=86.210, loss_att=62.958, acc=0.681, loss=69.933, backward_time=1.033, grad_norm=125.836, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.185, optim0_lr0=6.929e-05, train_time=2.794 +[gpub005:0/64] 2023-07-08 20:29:54,389 (trainer:732) INFO: 28epoch:train:401-500batch: iter_time=1.213e-04, forward_time=0.144, loss_ctc=68.699, loss_att=49.263, acc=0.703, loss=55.094, backward_time=1.030, grad_norm=113.824, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.928e-05, train_time=2.810 +[gpub005:0/64] 2023-07-08 20:32:17,226 (trainer:732) INFO: 28epoch:train:501-600batch: iter_time=1.182e-04, forward_time=0.145, loss_ctc=56.148, loss_att=39.661, acc=0.713, loss=44.607, backward_time=1.035, grad_norm=83.985, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.926e-05, train_time=2.857 +[gpub005:0/64] 2023-07-08 20:34:34,235 (trainer:732) INFO: 28epoch:train:601-700batch: iter_time=1.286e-04, forward_time=0.144, loss_ctc=72.009, loss_att=54.099, acc=0.698, loss=59.472, backward_time=1.032, grad_norm=134.899, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.925e-05, train_time=2.740 +[gpub005:0/64] 2023-07-08 20:36:55,642 (trainer:732) INFO: 28epoch:train:701-800batch: iter_time=1.265e-04, forward_time=0.144, loss_ctc=71.921, loss_att=54.806, acc=0.689, loss=59.941, backward_time=1.031, grad_norm=100.285, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.924e-05, train_time=2.828 +[gpub005:0/64] 2023-07-08 20:37:44,836 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-08 20:38:02,541 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 20:38:06,124 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 20:38:06,124 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-08 20:38:06,130 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 20:41:59,143 (trainer:732) INFO: 28epoch:train:801-900batch: iter_time=1.278, forward_time=0.167, loss_ctc=83.852, loss_att=67.054, acc=0.679, loss=72.094, backward_time=1.041, grad_norm=123.747, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.922e-05, train_time=6.070 +[gpub005:0/64] 2023-07-08 20:44:16,329 (trainer:732) INFO: 28epoch:train:901-1000batch: iter_time=1.260e-04, forward_time=0.146, loss_ctc=72.535, loss_att=53.389, acc=0.704, loss=59.133, backward_time=1.032, grad_norm=104.932, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.921e-05, train_time=2.743 +[gpub005:0/64] 2023-07-08 20:46:32,836 (trainer:732) INFO: 28epoch:train:1001-1100batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=81.503, loss_att=62.255, acc=0.693, loss=68.029, backward_time=1.030, grad_norm=104.731, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.920e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 20:48:49,066 (trainer:732) INFO: 28epoch:train:1101-1200batch: iter_time=1.186e-04, forward_time=0.145, loss_ctc=82.031, loss_att=62.754, acc=0.695, loss=68.537, backward_time=1.030, grad_norm=126.182, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.918e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 20:51:04,740 (trainer:732) INFO: 28epoch:train:1201-1300batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=68.969, loss_att=49.412, acc=0.698, loss=55.279, backward_time=1.027, grad_norm=110.693, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.917e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 20:53:20,512 (trainer:732) INFO: 28epoch:train:1301-1400batch: iter_time=1.139e-04, forward_time=0.145, loss_ctc=63.955, loss_att=43.837, acc=0.724, loss=49.873, backward_time=1.027, grad_norm=86.116, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.916e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 20:55:36,243 (trainer:732) INFO: 28epoch:train:1401-1500batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=62.174, loss_att=44.982, acc=0.721, loss=50.140, backward_time=1.029, grad_norm=99.535, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.914e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 20:57:52,044 (trainer:732) INFO: 28epoch:train:1501-1600batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=75.891, loss_att=56.730, acc=0.703, loss=62.479, backward_time=1.029, grad_norm=135.198, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.913e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 20:59:33,182 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-08 20:59:51,725 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 20:59:55,498 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 20:59:55,498 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-08 20:59:55,504 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 21:03:46,841 (trainer:732) INFO: 28epoch:train:1601-1700batch: iter_time=1.392, forward_time=0.151, loss_ctc=80.220, loss_att=63.955, acc=0.696, loss=68.834, backward_time=1.039, grad_norm=127.333, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.189, optim0_lr0=6.912e-05, train_time=7.095 +[gpub005:0/64] 2023-07-08 21:06:03,098 (trainer:732) INFO: 28epoch:train:1701-1800batch: iter_time=1.185e-04, forward_time=0.146, loss_ctc=72.712, loss_att=55.287, acc=0.677, loss=60.514, backward_time=1.029, grad_norm=100.144, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.910e-05, train_time=2.726 +[gpub005:0/64] 2023-07-08 21:08:19,000 (trainer:732) INFO: 28epoch:train:1801-1900batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=76.947, loss_att=57.906, acc=0.692, loss=63.618, backward_time=1.027, grad_norm=115.473, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.909e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 21:10:35,710 (trainer:732) INFO: 28epoch:train:1901-2000batch: iter_time=1.219e-04, forward_time=0.147, loss_ctc=76.026, loss_att=56.343, acc=0.691, loss=62.248, backward_time=1.031, grad_norm=107.646, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.908e-05, train_time=2.734 +[gpub005:0/64] 2023-07-08 21:12:54,818 (trainer:732) INFO: 28epoch:train:2001-2100batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=83.541, loss_att=60.894, acc=0.684, loss=67.688, backward_time=1.030, grad_norm=115.416, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.907e-05, train_time=2.782 +[gpub005:0/64] 2023-07-08 21:15:20,099 (trainer:732) INFO: 28epoch:train:2101-2200batch: iter_time=1.214e-04, forward_time=0.145, loss_ctc=67.313, loss_att=48.446, acc=0.713, loss=54.106, backward_time=1.040, grad_norm=89.259, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.905e-05, train_time=2.905 +[gpub005:0/64] 2023-07-08 21:17:35,563 (trainer:732) INFO: 28epoch:train:2201-2300batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=54.785, loss_att=40.768, acc=0.708, loss=44.973, backward_time=1.026, grad_norm=106.678, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.904e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 21:19:51,268 (trainer:732) INFO: 28epoch:train:2301-2400batch: iter_time=1.059e-04, forward_time=0.146, loss_ctc=70.891, loss_att=51.557, acc=0.706, loss=57.357, backward_time=1.028, grad_norm=101.405, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.903e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 21:22:06,949 (trainer:732) INFO: 28epoch:train:2401-2500batch: iter_time=1.033e-04, forward_time=0.146, loss_ctc=72.815, loss_att=54.854, acc=0.693, loss=60.242, backward_time=1.028, grad_norm=90.955, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.901e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 21:22:16,616 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-08 21:22:34,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 21:22:38,318 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 21:22:38,318 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-08 21:22:38,324 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 21:28:14,931 (trainer:732) INFO: 28epoch:train:2501-2600batch: iter_time=2.263, forward_time=0.151, loss_ctc=73.063, loss_att=56.968, acc=0.690, loss=61.797, backward_time=1.043, grad_norm=109.171, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.900e-05, train_time=7.359 +[gpub005:0/64] 2023-07-08 21:30:31,457 (trainer:732) INFO: 28epoch:train:2601-2700batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=72.709, loss_att=55.640, acc=0.701, loss=60.761, backward_time=1.030, grad_norm=110.117, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.899e-05, train_time=2.730 +[gpub005:0/64] 2023-07-08 21:32:47,437 (trainer:732) INFO: 28epoch:train:2701-2800batch: iter_time=1.193e-04, forward_time=0.148, loss_ctc=81.634, loss_att=59.340, acc=0.699, loss=66.029, backward_time=1.029, grad_norm=103.358, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.897e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 21:35:03,491 (trainer:732) INFO: 28epoch:train:2801-2900batch: iter_time=1.088e-04, forward_time=0.147, loss_ctc=80.592, loss_att=60.457, acc=0.699, loss=66.498, backward_time=1.031, grad_norm=117.781, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.896e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 21:37:19,461 (trainer:732) INFO: 28epoch:train:2901-3000batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=68.204, loss_att=49.213, acc=0.699, loss=54.910, backward_time=1.029, grad_norm=90.669, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.895e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 21:39:35,137 (trainer:732) INFO: 28epoch:train:3001-3100batch: iter_time=1.116e-04, forward_time=0.147, loss_ctc=62.732, loss_att=41.881, acc=0.732, loss=48.137, backward_time=1.028, grad_norm=92.578, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.893e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 21:41:50,956 (trainer:732) INFO: 28epoch:train:3101-3200batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=63.201, loss_att=47.922, acc=0.716, loss=52.506, backward_time=1.028, grad_norm=82.434, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.892e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 21:44:06,741 (trainer:732) INFO: 28epoch:train:3201-3300batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=73.855, loss_att=52.520, acc=0.715, loss=58.920, backward_time=1.028, grad_norm=95.321, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.891e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 21:44:56,339 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-08 21:45:14,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 21:45:18,307 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 21:45:18,308 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-08 21:45:18,314 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 21:49:50,797 (trainer:732) INFO: 28epoch:train:3301-3400batch: iter_time=1.410, forward_time=0.147, loss_ctc=67.104, loss_att=52.257, acc=0.697, loss=56.711, backward_time=1.043, grad_norm=105.434, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.889e-05, train_time=6.881 +[gpub005:0/64] 2023-07-08 21:52:07,052 (trainer:732) INFO: 28epoch:train:3401-3500batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=73.493, loss_att=55.952, acc=0.687, loss=61.214, backward_time=1.028, grad_norm=92.809, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.888e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 21:54:22,857 (trainer:732) INFO: 28epoch:train:3501-3600batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=81.704, loss_att=61.798, acc=0.682, loss=67.770, backward_time=1.027, grad_norm=108.375, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.887e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 21:56:38,614 (trainer:732) INFO: 28epoch:train:3601-3700batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=77.647, loss_att=60.096, acc=0.700, loss=65.361, backward_time=1.026, grad_norm=106.625, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.886e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 21:58:54,236 (trainer:732) INFO: 28epoch:train:3701-3800batch: iter_time=1.156e-04, forward_time=0.145, loss_ctc=74.147, loss_att=50.797, acc=0.688, loss=57.802, backward_time=1.026, grad_norm=102.912, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.884e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 22:01:09,914 (trainer:732) INFO: 28epoch:train:3801-3900batch: iter_time=1.187e-04, forward_time=0.144, loss_ctc=65.999, loss_att=48.726, acc=0.715, loss=53.908, backward_time=1.028, grad_norm=94.109, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.883e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 22:03:25,396 (trainer:732) INFO: 28epoch:train:3901-4000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=57.597, loss_att=41.269, acc=0.713, loss=46.168, backward_time=1.026, grad_norm=85.071, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.882e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 22:05:41,244 (trainer:732) INFO: 28epoch:train:4001-4100batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=76.578, loss_att=55.734, acc=0.705, loss=61.987, backward_time=1.029, grad_norm=122.779, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.880e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 22:07:13,468 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-08 22:07:31,740 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 22:07:35,467 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 22:07:35,467 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-08 22:07:35,473 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 22:11:06,603 (trainer:732) INFO: 28epoch:train:4101-4200batch: iter_time=1.257, forward_time=0.146, loss_ctc=69.702, loss_att=51.949, acc=0.705, loss=57.275, backward_time=1.039, grad_norm=91.533, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.879e-05, train_time=6.507 +[gpub005:0/64] 2023-07-08 22:13:23,067 (trainer:732) INFO: 28epoch:train:4201-4300batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=72.398, loss_att=55.641, acc=0.682, loss=60.668, backward_time=1.030, grad_norm=93.124, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.878e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 22:15:38,757 (trainer:732) INFO: 28epoch:train:4301-4400batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=75.659, loss_att=56.297, acc=0.700, loss=62.106, backward_time=1.027, grad_norm=100.781, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.876e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 22:17:54,550 (trainer:732) INFO: 28epoch:train:4401-4500batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=76.934, loss_att=56.013, acc=0.694, loss=62.289, backward_time=1.027, grad_norm=103.762, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.875e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 22:20:10,415 (trainer:732) INFO: 28epoch:train:4501-4600batch: iter_time=1.173e-04, forward_time=0.146, loss_ctc=82.080, loss_att=60.106, acc=0.689, loss=66.698, backward_time=1.028, grad_norm=118.848, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.874e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 22:22:26,037 (trainer:732) INFO: 28epoch:train:4601-4700batch: iter_time=1.057e-04, forward_time=0.146, loss_ctc=66.639, loss_att=48.499, acc=0.718, loss=53.941, backward_time=1.027, grad_norm=90.376, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.873e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 22:24:41,674 (trainer:732) INFO: 28epoch:train:4701-4800batch: iter_time=1.013e-04, forward_time=0.147, loss_ctc=54.555, loss_att=40.462, acc=0.714, loss=44.690, backward_time=1.027, grad_norm=90.199, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.871e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 22:26:57,140 (trainer:732) INFO: 28epoch:train:4801-4900batch: iter_time=1.256e-04, forward_time=0.144, loss_ctc=69.488, loss_att=50.570, acc=0.708, loss=56.246, backward_time=1.025, grad_norm=110.521, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.870e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 22:29:26,869 (trainer:732) INFO: 28epoch:train:4901-5000batch: iter_time=1.214e-04, forward_time=0.173, loss_ctc=72.734, loss_att=55.263, acc=0.695, loss=60.504, backward_time=1.071, grad_norm=108.435, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.869e-05, train_time=2.994 +[gpub005:0/64] 2023-07-08 22:29:31,147 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-08 22:29:49,288 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 22:29:52,760 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 22:29:52,760 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-08 22:29:52,766 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 22:35:12,143 (trainer:732) INFO: 28epoch:train:5001-5100batch: iter_time=1.294, forward_time=0.148, loss_ctc=72.245, loss_att=56.263, acc=0.695, loss=61.058, backward_time=1.050, grad_norm=99.183, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.867e-05, train_time=6.905 +[gpub005:0/64] 2023-07-08 22:37:28,422 (trainer:732) INFO: 28epoch:train:5101-5200batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=72.485, loss_att=56.428, acc=0.701, loss=61.245, backward_time=1.031, grad_norm=105.729, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.866e-05, train_time=2.725 +[gpub005:0/64] 2023-07-08 22:39:44,453 (trainer:732) INFO: 28epoch:train:5201-5300batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=82.505, loss_att=60.573, acc=0.698, loss=67.153, backward_time=1.030, grad_norm=117.962, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.865e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 22:42:00,678 (trainer:732) INFO: 28epoch:train:5301-5400batch: iter_time=1.214e-04, forward_time=0.147, loss_ctc=77.682, loss_att=59.365, acc=0.704, loss=64.860, backward_time=1.031, grad_norm=108.475, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.863e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 22:44:16,338 (trainer:732) INFO: 28epoch:train:5401-5500batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=70.316, loss_att=50.612, acc=0.699, loss=56.524, backward_time=1.028, grad_norm=95.879, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.862e-05, train_time=2.713 +[gpub005:0/64] 2023-07-08 22:46:32,135 (trainer:732) INFO: 28epoch:train:5501-5600batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=59.824, loss_att=40.367, acc=0.741, loss=46.204, backward_time=1.028, grad_norm=86.453, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.861e-05, train_time=2.716 +[gpub005:0/64] 2023-07-08 22:48:48,069 (trainer:732) INFO: 28epoch:train:5601-5700batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=62.478, loss_att=47.072, acc=0.721, loss=51.693, backward_time=1.028, grad_norm=97.361, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.860e-05, train_time=2.718 +[gpub005:0/64] 2023-07-08 22:51:03,830 (trainer:732) INFO: 28epoch:train:5701-5800batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=73.749, loss_att=52.299, acc=0.714, loss=58.734, backward_time=1.027, grad_norm=105.786, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.858e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 22:51:51,144 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-08 22:52:09,396 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 22:52:12,905 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 22:52:12,905 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-08 22:52:12,911 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 22:58:01,898 (trainer:732) INFO: 28epoch:train:5801-5900batch: iter_time=1.311, forward_time=0.146, loss_ctc=72.764, loss_att=56.765, acc=0.696, loss=61.565, backward_time=1.052, grad_norm=112.026, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.857e-05, train_time=8.361 +[gpub005:0/64] 2023-07-08 23:00:18,123 (trainer:732) INFO: 28epoch:train:5901-6000batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=71.428, loss_att=53.963, acc=0.702, loss=59.203, backward_time=1.029, grad_norm=114.929, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.856e-05, train_time=2.724 +[gpub005:0/64] 2023-07-08 23:02:33,896 (trainer:732) INFO: 28epoch:train:6001-6100batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=79.868, loss_att=61.221, acc=0.697, loss=66.815, backward_time=1.028, grad_norm=100.067, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.854e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 23:04:49,978 (trainer:732) INFO: 28epoch:train:6101-6200batch: iter_time=1.071e-04, forward_time=0.146, loss_ctc=78.402, loss_att=59.583, acc=0.701, loss=65.229, backward_time=1.030, grad_norm=112.131, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.853e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 23:07:05,436 (trainer:732) INFO: 28epoch:train:6201-6300batch: iter_time=1.087e-04, forward_time=0.144, loss_ctc=67.248, loss_att=47.858, acc=0.704, loss=53.675, backward_time=1.026, grad_norm=110.296, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.852e-05, train_time=2.709 +[gpub005:0/64] 2023-07-08 23:09:20,706 (trainer:732) INFO: 28epoch:train:6301-6400batch: iter_time=1.095e-04, forward_time=0.145, loss_ctc=63.660, loss_att=43.054, acc=0.732, loss=49.236, backward_time=1.026, grad_norm=83.660, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.851e-05, train_time=2.705 +[gpub005:0/64] 2023-07-08 23:11:36,441 (trainer:732) INFO: 28epoch:train:6401-6500batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=60.251, loss_att=43.848, acc=0.724, loss=48.769, backward_time=1.028, grad_norm=91.709, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.849e-05, train_time=2.714 +[gpub005:0/64] 2023-07-08 23:13:52,452 (trainer:732) INFO: 28epoch:train:6501-6600batch: iter_time=1.138e-04, forward_time=0.147, loss_ctc=74.912, loss_att=55.471, acc=0.707, loss=61.303, backward_time=1.030, grad_norm=89.579, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.848e-05, train_time=2.720 +[gpub005:0/64] 2023-07-08 23:15:26,907 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-08 23:15:45,143 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 23:15:48,597 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 23:15:48,597 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-08 23:15:48,603 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 23:20:55,660 (trainer:732) INFO: 28epoch:train:6601-6700batch: iter_time=1.286, forward_time=0.153, loss_ctc=79.725, loss_att=63.645, acc=0.703, loss=68.469, backward_time=1.041, grad_norm=117.042, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.185, optim0_lr0=6.847e-05, train_time=8.464 +[gpub005:0/64] 2023-07-08 23:23:21,217 (trainer:732) INFO: 28epoch:train:6701-6800batch: iter_time=1.228e-04, forward_time=0.164, loss_ctc=72.691, loss_att=55.014, acc=0.698, loss=60.317, backward_time=1.046, grad_norm=109.801, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.845e-05, train_time=2.911 +[gpub005:0/64] 2023-07-08 23:25:38,727 (trainer:732) INFO: 28epoch:train:6801-6900batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=75.359, loss_att=57.243, acc=0.704, loss=62.678, backward_time=1.029, grad_norm=107.335, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.844e-05, train_time=2.750 +[gpub005:0/64] 2023-07-08 23:27:54,797 (trainer:732) INFO: 28epoch:train:6901-7000batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=74.894, loss_att=54.136, acc=0.713, loss=60.363, backward_time=1.029, grad_norm=108.816, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.843e-05, train_time=2.721 +[gpub005:0/64] 2023-07-08 23:30:11,239 (trainer:732) INFO: 28epoch:train:7001-7100batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=81.357, loss_att=58.035, acc=0.695, loss=65.032, backward_time=1.032, grad_norm=149.789, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.842e-05, train_time=2.729 +[gpub005:0/64] 2023-07-08 23:32:40,425 (trainer:732) INFO: 28epoch:train:7101-7200batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=66.158, loss_att=46.689, acc=0.720, loss=52.530, backward_time=1.049, grad_norm=111.917, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.840e-05, train_time=2.983 +[gpub005:0/64] 2023-07-08 23:34:58,213 (trainer:732) INFO: 28epoch:train:7201-7300batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=54.101, loss_att=39.666, acc=0.729, loss=43.996, backward_time=1.033, grad_norm=92.941, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.839e-05, train_time=2.756 +[gpub005:0/64] 2023-07-08 23:37:14,151 (trainer:732) INFO: 28epoch:train:7301-7400batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=68.799, loss_att=50.354, acc=0.721, loss=55.888, backward_time=1.029, grad_norm=109.989, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.838e-05, train_time=2.719 +[gpub005:0/64] 2023-07-08 23:39:31,461 (trainer:732) INFO: 28epoch:train:7401-7500batch: iter_time=1.039e-04, forward_time=0.145, loss_ctc=72.077, loss_att=54.561, acc=0.706, loss=59.816, backward_time=1.030, grad_norm=91.086, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.836e-05, train_time=2.746 +[gpub005:0/64] 2023-07-08 23:39:34,674 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-08 23:39:53,142 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-08 23:39:56,612 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-08 23:39:56,612 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-08 23:39:56,618 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-08 23:45:44,315 (trainer:732) INFO: 28epoch:train:7501-7600batch: iter_time=1.328, forward_time=0.147, loss_ctc=71.380, loss_att=56.723, acc=0.686, loss=61.120, backward_time=1.041, grad_norm=103.229, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.835e-05, train_time=7.457 +[gpub005:0/64] 2023-07-08 23:48:00,073 (trainer:732) INFO: 28epoch:train:7601-7700batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=72.398, loss_att=57.071, acc=0.691, loss=61.669, backward_time=1.027, grad_norm=89.683, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.834e-05, train_time=2.715 +[gpub005:0/64] 2023-07-08 23:50:16,768 (trainer:732) INFO: 28epoch:train:7701-7800batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=78.542, loss_att=58.230, acc=0.695, loss=64.323, backward_time=1.027, grad_norm=116.271, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.833e-05, train_time=2.734 +[gpub005:0/64] 2023-07-08 23:52:32,396 (trainer:732) INFO: 28epoch:train:7801-7900batch: iter_time=1.381e-04, forward_time=0.144, loss_ctc=78.828, loss_att=58.826, acc=0.698, loss=64.827, backward_time=1.027, grad_norm=109.064, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.831e-05, train_time=2.712 +[gpub005:0/64] 2023-07-08 23:54:48,243 (trainer:732) INFO: 28epoch:train:7901-8000batch: iter_time=1.203e-04, forward_time=0.145, loss_ctc=69.930, loss_att=50.336, acc=0.706, loss=56.214, backward_time=1.027, grad_norm=96.055, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.830e-05, train_time=2.717 +[gpub005:0/64] 2023-07-08 23:57:03,582 (trainer:732) INFO: 28epoch:train:8001-8100batch: iter_time=1.285e-04, forward_time=0.145, loss_ctc=59.761, loss_att=41.062, acc=0.728, loss=46.672, backward_time=1.026, grad_norm=94.381, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.829e-05, train_time=2.707 +[gpub005:0/64] 2023-07-08 23:59:19,301 (trainer:732) INFO: 28epoch:train:8101-8200batch: iter_time=1.383e-04, forward_time=0.145, loss_ctc=63.711, loss_att=48.157, acc=0.707, loss=52.823, backward_time=1.029, grad_norm=95.737, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.828e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 00:01:34,627 (trainer:732) INFO: 28epoch:train:8201-8300batch: iter_time=1.262e-04, forward_time=0.144, loss_ctc=74.016, loss_att=52.648, acc=0.706, loss=59.058, backward_time=1.025, grad_norm=98.099, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.826e-05, train_time=2.706 +[gpub005:0/64] 2023-07-09 00:02:20,574 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-09 00:02:39,116 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 00:02:42,968 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 00:02:42,969 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-09 00:02:42,975 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 00:07:17,507 (trainer:732) INFO: 28epoch:train:8301-8400batch: iter_time=1.279, forward_time=0.147, loss_ctc=67.473, loss_att=52.063, acc=0.704, loss=56.686, backward_time=1.045, grad_norm=110.832, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.825e-05, train_time=6.857 +[gpub005:0/64] 2023-07-09 00:09:34,819 (trainer:732) INFO: 28epoch:train:8401-8500batch: iter_time=1.026e-04, forward_time=0.143, loss_ctc=73.300, loss_att=54.385, acc=0.702, loss=60.059, backward_time=1.027, grad_norm=113.571, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.824e-05, train_time=2.746 +[gpub005:0/64] 2023-07-09 00:11:51,672 (trainer:732) INFO: 28epoch:train:8501-8600batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=79.277, loss_att=60.008, acc=0.702, loss=65.788, backward_time=1.029, grad_norm=134.268, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.822e-05, train_time=2.737 +[gpub005:0/64] 2023-07-09 00:14:08,387 (trainer:732) INFO: 28epoch:train:8601-8700batch: iter_time=1.041e-04, forward_time=0.144, loss_ctc=79.016, loss_att=61.352, acc=0.710, loss=66.652, backward_time=1.029, grad_norm=114.222, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.821e-05, train_time=2.734 +[gpub005:0/64] 2023-07-09 00:16:24,162 (trainer:732) INFO: 28epoch:train:8701-8800batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=71.708, loss_att=50.081, acc=0.692, loss=56.569, backward_time=1.027, grad_norm=119.421, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.820e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 00:18:39,832 (trainer:732) INFO: 28epoch:train:8801-8900batch: iter_time=1.068e-04, forward_time=0.145, loss_ctc=63.647, loss_att=45.869, acc=0.730, loss=51.202, backward_time=1.027, grad_norm=81.853, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.819e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 00:21:09,960 (trainer:732) INFO: 28epoch:train:8901-9000batch: iter_time=1.066e-04, forward_time=0.145, loss_ctc=55.394, loss_att=39.466, acc=0.733, loss=44.244, backward_time=1.066, grad_norm=83.283, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.817e-05, train_time=3.002 +[gpub005:0/64] 2023-07-09 00:23:27,121 (trainer:732) INFO: 28epoch:train:9001-9100batch: iter_time=1.032e-04, forward_time=0.146, loss_ctc=77.192, loss_att=57.711, acc=0.712, loss=63.555, backward_time=1.030, grad_norm=122.747, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.816e-05, train_time=2.743 +[gpub005:0/64] 2023-07-09 00:24:58,099 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-09 00:25:16,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 00:25:19,994 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 00:25:19,994 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-09 00:25:20,000 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 00:30:05,437 (trainer:732) INFO: 28epoch:train:9101-9200batch: iter_time=1.262, forward_time=0.153, loss_ctc=66.052, loss_att=48.621, acc=0.718, loss=53.850, backward_time=1.040, grad_norm=88.945, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.815e-05, train_time=7.966 +[gpub005:0/64] 2023-07-09 00:32:27,808 (trainer:732) INFO: 28epoch:train:9201-9300batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=71.873, loss_att=53.639, acc=0.689, loss=59.110, backward_time=1.041, grad_norm=96.280, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.814e-05, train_time=2.847 +[gpub005:0/64] 2023-07-09 00:34:45,342 (trainer:732) INFO: 28epoch:train:9301-9400batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=80.187, loss_att=62.167, acc=0.681, loss=67.573, backward_time=1.028, grad_norm=122.430, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.812e-05, train_time=2.750 +[gpub005:0/64] 2023-07-09 00:37:02,014 (trainer:732) INFO: 28epoch:train:9401-9500batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=78.718, loss_att=59.859, acc=0.703, loss=65.517, backward_time=1.029, grad_norm=119.060, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.811e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 00:39:18,308 (trainer:732) INFO: 28epoch:train:9501-9600batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=72.327, loss_att=48.677, acc=0.700, loss=55.772, backward_time=1.028, grad_norm=106.106, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.810e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 00:41:34,013 (trainer:732) INFO: 28epoch:train:9601-9700batch: iter_time=1.140e-04, forward_time=0.144, loss_ctc=64.789, loss_att=49.014, acc=0.714, loss=53.746, backward_time=1.026, grad_norm=113.313, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.182, optim0_lr0=6.809e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 00:43:49,399 (trainer:732) INFO: 28epoch:train:9701-9800batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=57.198, loss_att=41.033, acc=0.717, loss=45.882, backward_time=1.025, grad_norm=93.064, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.807e-05, train_time=2.707 +[gpub005:0/64] 2023-07-09 00:46:05,002 (trainer:732) INFO: 28epoch:train:9801-9900batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=73.809, loss_att=54.241, acc=0.702, loss=60.111, backward_time=1.027, grad_norm=105.246, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.806e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 00:48:20,905 (trainer:732) INFO: 28epoch:train:9901-10000batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=79.070, loss_att=61.377, acc=0.697, loss=66.685, backward_time=1.029, grad_norm=105.646, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.805e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 01:00:59,859 (trainer:338) INFO: 28epoch results: [train] iter_time=0.181, forward_time=0.147, loss_ctc=71.863, loss_att=53.365, acc=0.703, loss=58.915, backward_time=1.032, grad_norm=105.211, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.868e-05, train_time=3.295, time=4 hours, 34 minutes and 54.57 seconds, total_count=250000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=47.311, cer_ctc=0.272, loss_att=40.448, acc=0.654, cer=0.441, wer=1.000, loss=42.507, time=6 minutes and 28.69 seconds, total_count=25806, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 49.92 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-09 01:01:17,688 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-09 01:01:17,806 (trainer:272) INFO: 29/30epoch started. Estimated time to finish: 9 hours, 37 minutes and 17.41 seconds +[gpub005:0/64] 2023-07-09 01:01:18,821 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-09 01:01:37,314 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 01:01:42,700 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 01:01:42,700 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-09 01:01:42,754 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 01:08:33,856 (trainer:732) INFO: 29epoch:train:1-100batch: iter_time=2.923, forward_time=0.172, loss_ctc=75.895, loss_att=61.514, acc=0.699, loss=65.828, backward_time=1.046, grad_norm=122.053, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.192, optim0_lr0=6.803e-05, train_time=8.710 +[gpub005:0/64] 2023-07-09 01:10:50,462 (trainer:732) INFO: 29epoch:train:101-200batch: iter_time=1.038e-04, forward_time=0.144, loss_ctc=68.461, loss_att=53.461, acc=0.697, loss=57.961, backward_time=1.028, grad_norm=104.908, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.802e-05, train_time=2.732 +[gpub005:0/64] 2023-07-09 01:13:08,278 (trainer:732) INFO: 29epoch:train:201-300batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=90.315, loss_att=55.873, acc=0.705, loss=66.206, backward_time=1.031, grad_norm=113.891, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.801e-05, train_time=2.756 +[gpub005:0/64] 2023-07-09 01:15:33,011 (trainer:732) INFO: 29epoch:train:301-400batch: iter_time=1.055e-04, forward_time=0.145, loss_ctc=73.221, loss_att=50.232, acc=0.710, loss=57.129, backward_time=1.036, grad_norm=122.026, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.800e-05, train_time=2.894 +[gpub005:0/64] 2023-07-09 01:17:54,852 (trainer:732) INFO: 29epoch:train:401-500batch: iter_time=9.923e-05, forward_time=0.145, loss_ctc=73.934, loss_att=56.527, acc=0.698, loss=61.749, backward_time=1.033, grad_norm=117.019, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.798e-05, train_time=2.837 +[gpub005:0/64] 2023-07-09 01:20:26,236 (trainer:732) INFO: 29epoch:train:501-600batch: iter_time=9.637e-05, forward_time=0.145, loss_ctc=70.822, loss_att=54.513, acc=0.707, loss=59.406, backward_time=1.075, grad_norm=98.241, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.797e-05, train_time=3.027 +[gpub005:0/64] 2023-07-09 01:22:58,108 (trainer:732) INFO: 29epoch:train:601-700batch: iter_time=9.952e-05, forward_time=0.145, loss_ctc=70.652, loss_att=57.641, acc=0.680, loss=61.544, backward_time=1.053, grad_norm=96.985, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.796e-05, train_time=3.037 +[gpub005:0/64] 2023-07-09 01:25:17,769 (trainer:732) INFO: 29epoch:train:701-800batch: iter_time=1.138e-04, forward_time=0.144, loss_ctc=60.087, loss_att=42.384, acc=0.718, loss=47.695, backward_time=1.040, grad_norm=90.361, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.795e-05, train_time=2.793 +[gpub005:0/64] 2023-07-09 01:26:10,853 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-09 01:26:28,595 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 01:26:32,230 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 01:26:32,230 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-09 01:26:32,236 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 01:31:54,364 (trainer:732) INFO: 29epoch:train:801-900batch: iter_time=1.384, forward_time=0.149, loss_ctc=78.227, loss_att=62.233, acc=0.694, loss=67.031, backward_time=1.042, grad_norm=108.631, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.793e-05, train_time=7.932 +[gpub005:0/64] 2023-07-09 01:34:11,688 (trainer:732) INFO: 29epoch:train:901-1000batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=63.851, loss_att=49.602, acc=0.700, loss=53.877, backward_time=1.029, grad_norm=110.254, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.792e-05, train_time=2.746 +[gpub005:0/64] 2023-07-09 01:36:27,600 (trainer:732) INFO: 29epoch:train:1001-1100batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=78.049, loss_att=56.277, acc=0.710, loss=62.809, backward_time=1.029, grad_norm=103.467, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.791e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 01:38:43,893 (trainer:732) INFO: 29epoch:train:1101-1200batch: iter_time=1.122e-04, forward_time=0.145, loss_ctc=80.017, loss_att=48.167, acc=0.710, loss=57.722, backward_time=1.029, grad_norm=140.589, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.790e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 01:41:00,282 (trainer:732) INFO: 29epoch:train:1201-1300batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=76.299, loss_att=60.020, acc=0.697, loss=64.904, backward_time=1.032, grad_norm=116.404, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.788e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 01:43:16,385 (trainer:732) INFO: 29epoch:train:1301-1400batch: iter_time=1.132e-04, forward_time=0.146, loss_ctc=73.892, loss_att=56.992, acc=0.705, loss=62.062, backward_time=1.031, grad_norm=120.717, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.787e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 01:45:32,444 (trainer:732) INFO: 29epoch:train:1401-1500batch: iter_time=1.029e-04, forward_time=0.146, loss_ctc=70.577, loss_att=55.685, acc=0.690, loss=60.153, backward_time=1.030, grad_norm=103.511, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.786e-05, train_time=2.721 +[gpub005:0/64] 2023-07-09 01:47:47,996 (trainer:732) INFO: 29epoch:train:1501-1600batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=63.930, loss_att=50.799, acc=0.697, loss=54.738, backward_time=1.026, grad_norm=103.992, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.785e-05, train_time=2.711 +[gpub005:0/64] 2023-07-09 01:49:20,766 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-09 01:49:38,971 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 01:49:42,688 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 01:49:42,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-09 01:49:42,694 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 01:54:36,572 (trainer:732) INFO: 29epoch:train:1601-1700batch: iter_time=1.342, forward_time=0.168, loss_ctc=63.970, loss_att=46.468, acc=0.710, loss=51.719, backward_time=1.035, grad_norm=113.327, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.184, optim0_lr0=6.783e-05, train_time=8.171 +[gpub005:0/64] 2023-07-09 01:56:56,586 (trainer:732) INFO: 29epoch:train:1701-1800batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=64.314, loss_att=50.068, acc=0.699, loss=54.342, backward_time=1.035, grad_norm=153.292, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.782e-05, train_time=2.800 +[gpub005:0/64] 2023-07-09 01:59:12,539 (trainer:732) INFO: 29epoch:train:1801-1900batch: iter_time=1.046e-04, forward_time=0.147, loss_ctc=74.996, loss_att=54.133, acc=0.713, loss=60.392, backward_time=1.029, grad_norm=101.452, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.781e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 02:01:28,732 (trainer:732) INFO: 29epoch:train:1901-2000batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=79.906, loss_att=47.196, acc=0.715, loss=57.009, backward_time=1.030, grad_norm=115.619, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.780e-05, train_time=2.724 +[gpub005:0/64] 2023-07-09 02:03:45,806 (trainer:732) INFO: 29epoch:train:2001-2100batch: iter_time=1.086e-04, forward_time=0.148, loss_ctc=73.845, loss_att=58.312, acc=0.700, loss=62.972, backward_time=1.032, grad_norm=112.680, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.778e-05, train_time=2.741 +[gpub005:0/64] 2023-07-09 02:06:01,896 (trainer:732) INFO: 29epoch:train:2101-2200batch: iter_time=1.073e-04, forward_time=0.147, loss_ctc=72.628, loss_att=54.739, acc=0.701, loss=60.106, backward_time=1.030, grad_norm=99.890, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.777e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 02:08:17,780 (trainer:732) INFO: 29epoch:train:2201-2300batch: iter_time=1.062e-04, forward_time=0.146, loss_ctc=69.153, loss_att=52.877, acc=0.697, loss=57.760, backward_time=1.029, grad_norm=124.253, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.776e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 02:10:33,671 (trainer:732) INFO: 29epoch:train:2301-2400batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=64.791, loss_att=53.066, acc=0.698, loss=56.583, backward_time=1.029, grad_norm=124.055, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.775e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 02:12:49,411 (trainer:732) INFO: 29epoch:train:2401-2500batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=67.622, loss_att=55.140, acc=0.702, loss=58.884, backward_time=1.028, grad_norm=88.955, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.182, optim0_lr0=6.773e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 02:12:54,375 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-09 02:13:12,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 02:13:15,958 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 02:13:15,958 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-09 02:13:15,964 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 02:19:10,372 (trainer:732) INFO: 29epoch:train:2501-2600batch: iter_time=2.404, forward_time=0.165, loss_ctc=66.935, loss_att=50.456, acc=0.706, loss=55.400, backward_time=1.037, grad_norm=106.775, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.185, optim0_lr0=6.772e-05, train_time=7.619 +[gpub005:0/64] 2023-07-09 02:21:26,766 (trainer:732) INFO: 29epoch:train:2601-2700batch: iter_time=1.113e-04, forward_time=0.146, loss_ctc=68.042, loss_att=50.061, acc=0.715, loss=55.456, backward_time=1.030, grad_norm=115.964, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.771e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 02:23:45,668 (trainer:732) INFO: 29epoch:train:2701-2800batch: iter_time=1.113e-04, forward_time=0.144, loss_ctc=87.303, loss_att=56.217, acc=0.712, loss=65.543, backward_time=1.029, grad_norm=147.226, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.770e-05, train_time=2.778 +[gpub005:0/64] 2023-07-09 02:26:01,790 (trainer:732) INFO: 29epoch:train:2801-2900batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=68.342, loss_att=46.773, acc=0.712, loss=53.244, backward_time=1.029, grad_norm=107.727, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.768e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 02:28:17,591 (trainer:732) INFO: 29epoch:train:2901-3000batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=70.546, loss_att=55.696, acc=0.702, loss=60.151, backward_time=1.028, grad_norm=97.665, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.767e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 02:30:33,597 (trainer:732) INFO: 29epoch:train:3001-3100batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=73.929, loss_att=58.503, acc=0.704, loss=63.131, backward_time=1.029, grad_norm=99.412, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.766e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 02:32:49,257 (trainer:732) INFO: 29epoch:train:3101-3200batch: iter_time=1.203e-04, forward_time=0.146, loss_ctc=64.078, loss_att=51.000, acc=0.697, loss=54.924, backward_time=1.026, grad_norm=104.136, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.765e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 02:35:05,104 (trainer:732) INFO: 29epoch:train:3201-3300batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=64.099, loss_att=47.265, acc=0.709, loss=52.315, backward_time=1.029, grad_norm=97.522, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.764e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 02:35:54,763 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-09 02:36:12,774 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 02:36:16,518 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 02:36:16,518 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-09 02:36:16,524 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 02:40:55,557 (trainer:732) INFO: 29epoch:train:3301-3400batch: iter_time=1.419, forward_time=0.149, loss_ctc=81.136, loss_att=66.941, acc=0.694, loss=71.200, backward_time=1.068, grad_norm=119.406, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.762e-05, train_time=7.009 +[gpub005:0/64] 2023-07-09 02:43:16,133 (trainer:732) INFO: 29epoch:train:3401-3500batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=63.487, loss_att=49.185, acc=0.710, loss=53.476, backward_time=1.034, grad_norm=98.479, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.761e-05, train_time=2.811 +[gpub005:0/64] 2023-07-09 02:45:32,121 (trainer:732) INFO: 29epoch:train:3501-3600batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=77.025, loss_att=55.124, acc=0.715, loss=61.694, backward_time=1.028, grad_norm=114.670, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.760e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 02:47:47,521 (trainer:732) INFO: 29epoch:train:3601-3700batch: iter_time=1.214e-04, forward_time=0.145, loss_ctc=76.344, loss_att=46.095, acc=0.716, loss=55.170, backward_time=1.025, grad_norm=108.088, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.759e-05, train_time=2.708 +[gpub005:0/64] 2023-07-09 02:50:03,361 (trainer:732) INFO: 29epoch:train:3701-3800batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=74.359, loss_att=57.845, acc=0.703, loss=62.799, backward_time=1.027, grad_norm=113.639, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.757e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 02:52:18,965 (trainer:732) INFO: 29epoch:train:3801-3900batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=74.601, loss_att=55.697, acc=0.706, loss=61.368, backward_time=1.027, grad_norm=110.806, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.756e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 02:54:49,718 (trainer:732) INFO: 29epoch:train:3901-4000batch: iter_time=1.248e-04, forward_time=0.147, loss_ctc=69.897, loss_att=54.034, acc=0.698, loss=58.793, backward_time=1.058, grad_norm=103.506, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.755e-05, train_time=3.015 +[gpub005:0/64] 2023-07-09 02:57:08,751 (trainer:732) INFO: 29epoch:train:4001-4100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=63.725, loss_att=50.310, acc=0.701, loss=54.335, backward_time=1.034, grad_norm=87.206, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.754e-05, train_time=2.780 +[gpub005:0/64] 2023-07-09 02:58:49,008 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-09 02:59:07,100 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 02:59:10,536 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 02:59:10,536 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-09 02:59:10,542 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 03:03:56,521 (trainer:732) INFO: 29epoch:train:4101-4200batch: iter_time=1.354, forward_time=0.145, loss_ctc=72.105, loss_att=56.202, acc=0.708, loss=60.973, backward_time=1.040, grad_norm=111.622, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.752e-05, train_time=8.155 +[gpub005:0/64] 2023-07-09 03:06:13,202 (trainer:732) INFO: 29epoch:train:4201-4300batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=69.002, loss_att=53.608, acc=0.690, loss=58.226, backward_time=1.028, grad_norm=99.107, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.751e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 03:08:28,913 (trainer:732) INFO: 29epoch:train:4301-4400batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=68.222, loss_att=49.890, acc=0.715, loss=55.390, backward_time=1.027, grad_norm=99.071, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.750e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 03:10:44,538 (trainer:732) INFO: 29epoch:train:4401-4500batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=84.780, loss_att=56.108, acc=0.707, loss=64.710, backward_time=1.027, grad_norm=107.348, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.749e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 03:13:02,169 (trainer:732) INFO: 29epoch:train:4501-4600batch: iter_time=1.167e-04, forward_time=0.145, loss_ctc=69.759, loss_att=47.626, acc=0.702, loss=54.266, backward_time=1.029, grad_norm=98.923, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.747e-05, train_time=2.752 +[gpub005:0/64] 2023-07-09 03:15:18,719 (trainer:732) INFO: 29epoch:train:4601-4700batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=70.803, loss_att=56.571, acc=0.695, loss=60.841, backward_time=1.026, grad_norm=104.781, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.746e-05, train_time=2.731 +[gpub005:0/64] 2023-07-09 03:17:34,404 (trainer:732) INFO: 29epoch:train:4701-4800batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=72.078, loss_att=57.762, acc=0.699, loss=62.057, backward_time=1.027, grad_norm=104.257, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.745e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 03:19:52,007 (trainer:732) INFO: 29epoch:train:4801-4900batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=64.906, loss_att=51.504, acc=0.697, loss=55.525, backward_time=1.030, grad_norm=93.447, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.744e-05, train_time=2.752 +[gpub005:0/64] 2023-07-09 03:22:08,209 (trainer:732) INFO: 29epoch:train:4901-5000batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.847, loss_att=49.021, acc=0.700, loss=53.469, backward_time=1.026, grad_norm=89.197, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.743e-05, train_time=2.724 +[gpub005:0/64] 2023-07-09 03:22:16,127 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-09 03:22:34,650 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 03:22:38,116 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 03:22:38,116 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-09 03:22:38,123 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 03:29:32,011 (trainer:732) INFO: 29epoch:train:5001-5100batch: iter_time=1.438, forward_time=0.167, loss_ctc=66.539, loss_att=51.321, acc=0.694, loss=55.886, backward_time=1.037, grad_norm=101.883, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.184, optim0_lr0=6.741e-05, train_time=8.875 +[gpub005:0/64] 2023-07-09 03:31:48,082 (trainer:732) INFO: 29epoch:train:5101-5200batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=67.465, loss_att=49.395, acc=0.710, loss=54.816, backward_time=1.027, grad_norm=99.778, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.740e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 03:34:04,172 (trainer:732) INFO: 29epoch:train:5201-5300batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=86.270, loss_att=55.902, acc=0.709, loss=65.012, backward_time=1.026, grad_norm=130.441, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.739e-05, train_time=2.722 +[gpub005:0/64] 2023-07-09 03:36:19,829 (trainer:732) INFO: 29epoch:train:5301-5400batch: iter_time=1.055e-04, forward_time=0.147, loss_ctc=69.380, loss_att=47.502, acc=0.705, loss=54.065, backward_time=1.026, grad_norm=129.518, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.738e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 03:38:36,943 (trainer:732) INFO: 29epoch:train:5401-5500batch: iter_time=1.046e-04, forward_time=0.148, loss_ctc=69.082, loss_att=55.354, acc=0.697, loss=59.473, backward_time=1.029, grad_norm=113.486, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.736e-05, train_time=2.742 +[gpub005:0/64] 2023-07-09 03:40:53,445 (trainer:732) INFO: 29epoch:train:5501-5600batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=72.582, loss_att=57.397, acc=0.696, loss=61.952, backward_time=1.029, grad_norm=101.675, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.735e-05, train_time=2.730 +[gpub005:0/64] 2023-07-09 03:43:09,072 (trainer:732) INFO: 29epoch:train:5601-5700batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=64.110, loss_att=50.796, acc=0.696, loss=54.790, backward_time=1.026, grad_norm=95.741, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.734e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 03:45:25,401 (trainer:732) INFO: 29epoch:train:5701-5800batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=63.508, loss_att=48.524, acc=0.705, loss=53.020, backward_time=1.026, grad_norm=92.218, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.733e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 03:46:27,381 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-09 03:46:45,495 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 03:46:49,010 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 03:46:49,010 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-09 03:46:49,016 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 03:53:18,837 (trainer:732) INFO: 29epoch:train:5801-5900batch: iter_time=2.783, forward_time=0.147, loss_ctc=79.436, loss_att=67.417, acc=0.689, loss=71.022, backward_time=1.047, grad_norm=117.828, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.732e-05, train_time=9.468 +[gpub005:0/64] 2023-07-09 03:55:34,614 (trainer:732) INFO: 29epoch:train:5901-6000batch: iter_time=1.240e-04, forward_time=0.145, loss_ctc=62.841, loss_att=48.708, acc=0.705, loss=52.948, backward_time=1.026, grad_norm=98.596, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.730e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 03:57:50,427 (trainer:732) INFO: 29epoch:train:6001-6100batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=76.795, loss_att=55.754, acc=0.710, loss=62.066, backward_time=1.024, grad_norm=125.132, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.729e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 04:00:06,297 (trainer:732) INFO: 29epoch:train:6101-6200batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=76.623, loss_att=46.076, acc=0.713, loss=55.240, backward_time=1.026, grad_norm=135.922, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.728e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 04:02:23,184 (trainer:732) INFO: 29epoch:train:6201-6300batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=73.911, loss_att=58.019, acc=0.695, loss=62.787, backward_time=1.028, grad_norm=106.548, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.727e-05, train_time=2.738 +[gpub005:0/64] 2023-07-09 04:04:41,945 (trainer:732) INFO: 29epoch:train:6301-6400batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=73.690, loss_att=55.703, acc=0.702, loss=61.099, backward_time=1.028, grad_norm=92.974, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.725e-05, train_time=2.775 +[gpub005:0/64] 2023-07-09 04:07:02,114 (trainer:732) INFO: 29epoch:train:6401-6500batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=69.683, loss_att=53.819, acc=0.693, loss=58.578, backward_time=1.030, grad_norm=107.754, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.724e-05, train_time=2.803 +[gpub005:0/64] 2023-07-09 04:09:24,271 (trainer:732) INFO: 29epoch:train:6501-6600batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=62.912, loss_att=49.656, acc=0.700, loss=53.633, backward_time=1.051, grad_norm=93.183, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.723e-05, train_time=2.843 +[gpub005:0/64] 2023-07-09 04:10:56,904 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-09 04:11:14,983 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 04:11:18,376 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 04:11:18,376 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-09 04:11:18,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 04:15:17,356 (trainer:732) INFO: 29epoch:train:6601-6700batch: iter_time=1.432, forward_time=0.150, loss_ctc=71.020, loss_att=55.879, acc=0.709, loss=60.421, backward_time=1.057, grad_norm=109.386, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.722e-05, train_time=7.061 +[gpub005:0/64] 2023-07-09 04:17:34,269 (trainer:732) INFO: 29epoch:train:6701-6800batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=67.619, loss_att=52.545, acc=0.704, loss=57.067, backward_time=1.032, grad_norm=94.824, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.721e-05, train_time=2.738 +[gpub005:0/64] 2023-07-09 04:19:50,475 (trainer:732) INFO: 29epoch:train:6801-6900batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.948, loss_att=51.387, acc=0.714, loss=56.355, backward_time=1.030, grad_norm=96.289, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.719e-05, train_time=2.724 +[gpub005:0/64] 2023-07-09 04:22:06,494 (trainer:732) INFO: 29epoch:train:6901-7000batch: iter_time=1.305e-04, forward_time=0.147, loss_ctc=84.714, loss_att=54.294, acc=0.723, loss=63.420, backward_time=1.030, grad_norm=101.845, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.718e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 04:24:22,311 (trainer:732) INFO: 29epoch:train:7001-7100batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=69.271, loss_att=47.599, acc=0.712, loss=54.101, backward_time=1.026, grad_norm=120.928, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.717e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 04:26:38,042 (trainer:732) INFO: 29epoch:train:7101-7200batch: iter_time=1.078e-04, forward_time=0.145, loss_ctc=69.513, loss_att=54.422, acc=0.710, loss=58.949, backward_time=1.027, grad_norm=110.610, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.716e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 04:28:54,309 (trainer:732) INFO: 29epoch:train:7201-7300batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=72.193, loss_att=57.718, acc=0.709, loss=62.061, backward_time=1.030, grad_norm=102.662, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.715e-05, train_time=2.725 +[gpub005:0/64] 2023-07-09 04:31:09,724 (trainer:732) INFO: 29epoch:train:7301-7400batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=66.020, loss_att=51.633, acc=0.701, loss=55.949, backward_time=1.025, grad_norm=97.883, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.713e-05, train_time=2.708 +[gpub005:0/64] 2023-07-09 04:33:26,175 (trainer:732) INFO: 29epoch:train:7401-7500batch: iter_time=1.074e-04, forward_time=0.147, loss_ctc=62.895, loss_att=47.361, acc=0.711, loss=52.021, backward_time=1.027, grad_norm=95.711, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.712e-05, train_time=2.729 +[gpub005:0/64] 2023-07-09 04:33:28,013 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-09 04:33:46,250 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 04:33:49,686 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 04:33:49,686 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-09 04:33:49,720 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 04:39:35,421 (trainer:732) INFO: 29epoch:train:7501-7600batch: iter_time=1.272, forward_time=0.187, loss_ctc=73.581, loss_att=59.664, acc=0.699, loss=63.839, backward_time=1.051, grad_norm=123.090, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.185, optim0_lr0=6.711e-05, train_time=7.385 +[gpub005:0/64] 2023-07-09 04:41:51,424 (trainer:732) INFO: 29epoch:train:7601-7700batch: iter_time=1.149e-04, forward_time=0.147, loss_ctc=66.484, loss_att=50.773, acc=0.706, loss=55.486, backward_time=1.027, grad_norm=101.249, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.710e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 04:44:13,765 (trainer:732) INFO: 29epoch:train:7701-7800batch: iter_time=1.341e-04, forward_time=0.146, loss_ctc=84.648, loss_att=54.865, acc=0.710, loss=63.799, backward_time=1.032, grad_norm=114.104, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.709e-05, train_time=2.847 +[gpub005:0/64] 2023-07-09 04:46:32,054 (trainer:732) INFO: 29epoch:train:7801-7900batch: iter_time=1.111e-04, forward_time=0.147, loss_ctc=69.048, loss_att=47.959, acc=0.715, loss=54.286, backward_time=1.040, grad_norm=97.389, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.707e-05, train_time=2.766 +[gpub005:0/64] 2023-07-09 04:48:49,620 (trainer:732) INFO: 29epoch:train:7901-8000batch: iter_time=1.106e-04, forward_time=0.147, loss_ctc=72.274, loss_att=55.314, acc=0.692, loss=60.402, backward_time=1.028, grad_norm=137.366, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.706e-05, train_time=2.751 +[gpub005:0/64] 2023-07-09 04:51:28,616 (trainer:732) INFO: 29epoch:train:8001-8100batch: iter_time=1.309e-04, forward_time=0.147, loss_ctc=69.384, loss_att=53.865, acc=0.708, loss=58.521, backward_time=1.057, grad_norm=96.316, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.705e-05, train_time=3.180 +[gpub005:0/64] 2023-07-09 04:53:45,027 (trainer:732) INFO: 29epoch:train:8101-8200batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=69.389, loss_att=54.583, acc=0.692, loss=59.025, backward_time=1.030, grad_norm=99.264, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.704e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 04:56:00,319 (trainer:732) INFO: 29epoch:train:8201-8300batch: iter_time=1.142e-04, forward_time=0.143, loss_ctc=58.213, loss_att=41.467, acc=0.724, loss=46.491, backward_time=1.023, grad_norm=83.113, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.702e-05, train_time=2.706 +[gpub005:0/64] 2023-07-09 04:56:51,431 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-09 04:57:09,723 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 04:57:13,145 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 04:57:13,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-09 04:57:13,152 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 05:01:29,095 (trainer:732) INFO: 29epoch:train:8301-8400batch: iter_time=1.321, forward_time=0.146, loss_ctc=77.823, loss_att=61.892, acc=0.697, loss=66.671, backward_time=1.041, grad_norm=131.315, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.701e-05, train_time=6.575 +[gpub005:0/64] 2023-07-09 05:03:46,371 (trainer:732) INFO: 29epoch:train:8401-8500batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=62.357, loss_att=49.168, acc=0.702, loss=53.124, backward_time=1.028, grad_norm=114.769, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.700e-05, train_time=2.745 +[gpub005:0/64] 2023-07-09 05:06:02,208 (trainer:732) INFO: 29epoch:train:8501-8600batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=76.933, loss_att=55.223, acc=0.716, loss=61.736, backward_time=1.029, grad_norm=107.873, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.699e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 05:08:17,804 (trainer:732) INFO: 29epoch:train:8601-8700batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=75.982, loss_att=45.898, acc=0.712, loss=54.923, backward_time=1.026, grad_norm=112.216, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.698e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 05:10:33,845 (trainer:732) INFO: 29epoch:train:8701-8800batch: iter_time=1.255e-04, forward_time=0.147, loss_ctc=74.836, loss_att=57.671, acc=0.698, loss=62.820, backward_time=1.029, grad_norm=99.814, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.696e-05, train_time=2.721 +[gpub005:0/64] 2023-07-09 05:12:49,571 (trainer:732) INFO: 29epoch:train:8801-8900batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=72.233, loss_att=54.973, acc=0.705, loss=60.151, backward_time=1.027, grad_norm=95.784, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.695e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 05:15:05,314 (trainer:732) INFO: 29epoch:train:8901-9000batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=67.982, loss_att=52.959, acc=0.698, loss=57.466, backward_time=1.026, grad_norm=94.407, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.694e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 05:17:20,925 (trainer:732) INFO: 29epoch:train:9001-9100batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=63.000, loss_att=48.975, acc=0.703, loss=53.183, backward_time=1.027, grad_norm=93.871, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.693e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 05:18:51,733 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-09 05:19:10,033 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 05:19:13,693 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 05:19:13,693 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-09 05:19:13,699 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 05:22:38,222 (trainer:732) INFO: 29epoch:train:9101-9200batch: iter_time=1.288, forward_time=0.145, loss_ctc=63.360, loss_att=48.019, acc=0.710, loss=52.621, backward_time=1.038, grad_norm=99.747, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.692e-05, train_time=6.346 +[gpub005:0/64] 2023-07-09 05:24:58,949 (trainer:732) INFO: 29epoch:train:9201-9300batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=62.760, loss_att=47.716, acc=0.709, loss=52.229, backward_time=1.046, grad_norm=108.849, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.690e-05, train_time=2.814 +[gpub005:0/64] 2023-07-09 05:27:21,659 (trainer:732) INFO: 29epoch:train:9301-9400batch: iter_time=1.041e-04, forward_time=0.146, loss_ctc=74.976, loss_att=52.987, acc=0.714, loss=59.583, backward_time=1.067, grad_norm=117.382, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.689e-05, train_time=2.854 +[gpub005:0/64] 2023-07-09 05:29:40,539 (trainer:732) INFO: 29epoch:train:9401-9500batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=76.575, loss_att=47.980, acc=0.712, loss=56.559, backward_time=1.030, grad_norm=107.961, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.688e-05, train_time=2.777 +[gpub005:0/64] 2023-07-09 05:31:56,436 (trainer:732) INFO: 29epoch:train:9501-9600batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=70.340, loss_att=56.056, acc=0.697, loss=60.341, backward_time=1.027, grad_norm=106.273, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.687e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 05:34:12,843 (trainer:732) INFO: 29epoch:train:9601-9700batch: iter_time=1.046e-04, forward_time=0.146, loss_ctc=71.444, loss_att=54.036, acc=0.701, loss=59.259, backward_time=1.028, grad_norm=99.944, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.686e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 05:36:29,650 (trainer:732) INFO: 29epoch:train:9701-9800batch: iter_time=1.158e-04, forward_time=0.152, loss_ctc=68.773, loss_att=52.287, acc=0.698, loss=57.233, backward_time=1.027, grad_norm=110.662, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.684e-05, train_time=2.736 +[gpub005:0/64] 2023-07-09 05:38:48,684 (trainer:732) INFO: 29epoch:train:9801-9900batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=64.624, loss_att=52.273, acc=0.696, loss=55.979, backward_time=1.028, grad_norm=103.082, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.683e-05, train_time=2.780 +[gpub005:0/64] 2023-07-09 05:41:10,992 (trainer:732) INFO: 29epoch:train:9901-10000batch: iter_time=1.091e-04, forward_time=0.145, loss_ctc=67.638, loss_att=54.649, acc=0.699, loss=58.546, backward_time=1.032, grad_norm=92.414, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.682e-05, train_time=2.846 +[gpub005:0/64] 2023-07-09 05:55:00,842 (trainer:338) INFO: 29epoch results: [train] iter_time=0.204, forward_time=0.147, loss_ctc=70.995, loss_att=53.088, acc=0.704, loss=58.460, backward_time=1.033, grad_norm=107.676, clip=100.000, loss_scale=2.515e+25, optim_step_time=0.183, optim0_lr0=6.742e-05, train_time=3.358, time=4 hours, 40 minutes and 6.74 seconds, total_count=260000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.134, cer_ctc=0.273, loss_att=39.724, acc=0.683, cer=0.360, wer=0.992, loss=42.247, time=7 minutes and 46.79 seconds, total_count=26818, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 49.35 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-09 05:55:19,622 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub005:0/64] 2023-07-09 05:55:19,855 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/18epoch.pth +[gpub005:0/64] 2023-07-09 05:55:19,907 (trainer:272) INFO: 30/30epoch started. Estimated time to finish: 4 hours, 49 minutes and 24.9 seconds +[gpub005:0/64] 2023-07-09 05:55:21,146 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub005:0/64] 2023-07-09 05:55:40,193 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 05:55:43,757 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 05:55:43,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub005:0/64] 2023-07-09 05:55:43,848 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 06:02:34,500 (trainer:732) INFO: 30epoch:train:1-100batch: iter_time=2.918, forward_time=0.176, loss_ctc=81.654, loss_att=67.808, acc=0.681, loss=71.962, backward_time=1.043, grad_norm=106.248, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.185, optim0_lr0=6.681e-05, train_time=8.680 +[gpub005:0/64] 2023-07-09 06:04:53,428 (trainer:732) INFO: 30epoch:train:101-200batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=70.159, loss_att=56.442, acc=0.698, loss=60.557, backward_time=1.033, grad_norm=116.749, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.680e-05, train_time=2.778 +[gpub005:0/64] 2023-07-09 06:07:19,263 (trainer:732) INFO: 30epoch:train:201-300batch: iter_time=1.039e-04, forward_time=0.144, loss_ctc=75.061, loss_att=51.507, acc=0.692, loss=58.573, backward_time=1.042, grad_norm=111.019, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.679e-05, train_time=2.916 +[gpub005:0/64] 2023-07-09 06:09:35,049 (trainer:732) INFO: 30epoch:train:301-400batch: iter_time=1.042e-04, forward_time=0.144, loss_ctc=69.663, loss_att=53.432, acc=0.703, loss=58.301, backward_time=1.029, grad_norm=130.261, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.677e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 06:11:56,677 (trainer:732) INFO: 30epoch:train:401-500batch: iter_time=1.208e-04, forward_time=0.144, loss_ctc=79.996, loss_att=60.102, acc=0.682, loss=66.070, backward_time=1.054, grad_norm=137.978, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.676e-05, train_time=2.832 +[gpub005:0/64] 2023-07-09 06:14:12,035 (trainer:732) INFO: 30epoch:train:501-600batch: iter_time=1.345e-04, forward_time=0.144, loss_ctc=75.063, loss_att=53.414, acc=0.695, loss=59.909, backward_time=1.026, grad_norm=97.590, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.675e-05, train_time=2.707 +[gpub005:0/64] 2023-07-09 06:16:30,642 (trainer:732) INFO: 30epoch:train:601-700batch: iter_time=1.235e-04, forward_time=0.145, loss_ctc=72.761, loss_att=55.961, acc=0.699, loss=61.001, backward_time=1.029, grad_norm=115.744, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.674e-05, train_time=2.772 +[gpub005:0/64] 2023-07-09 06:18:50,359 (trainer:732) INFO: 30epoch:train:701-800batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=78.518, loss_att=61.887, acc=0.700, loss=66.876, backward_time=1.035, grad_norm=107.789, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.673e-05, train_time=2.794 +[gpub005:0/64] 2023-07-09 06:19:41,326 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub005:0/64] 2023-07-09 06:19:59,097 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 06:20:02,459 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 06:20:02,459 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub005:0/64] 2023-07-09 06:20:02,465 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 06:24:30,746 (trainer:732) INFO: 30epoch:train:801-900batch: iter_time=1.428, forward_time=0.152, loss_ctc=78.646, loss_att=64.512, acc=0.695, loss=68.752, backward_time=1.044, grad_norm=116.800, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.671e-05, train_time=6.808 +[gpub005:0/64] 2023-07-09 06:26:47,705 (trainer:732) INFO: 30epoch:train:901-1000batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=70.264, loss_att=56.691, acc=0.706, loss=60.763, backward_time=1.030, grad_norm=100.914, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.670e-05, train_time=2.739 +[gpub005:0/64] 2023-07-09 06:29:04,083 (trainer:732) INFO: 30epoch:train:1001-1100batch: iter_time=1.106e-04, forward_time=0.145, loss_ctc=72.147, loss_att=55.265, acc=0.704, loss=60.330, backward_time=1.028, grad_norm=115.205, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.669e-05, train_time=2.727 +[gpub005:0/64] 2023-07-09 06:31:19,765 (trainer:732) INFO: 30epoch:train:1101-1200batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=71.106, loss_att=51.456, acc=0.709, loss=57.351, backward_time=1.027, grad_norm=110.706, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.668e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 06:33:35,674 (trainer:732) INFO: 30epoch:train:1201-1300batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=73.058, loss_att=55.161, acc=0.705, loss=60.531, backward_time=1.028, grad_norm=104.330, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.667e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 06:35:51,423 (trainer:732) INFO: 30epoch:train:1301-1400batch: iter_time=1.195e-04, forward_time=0.146, loss_ctc=74.348, loss_att=54.096, acc=0.690, loss=60.172, backward_time=1.027, grad_norm=101.529, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.665e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 06:38:07,358 (trainer:732) INFO: 30epoch:train:1401-1500batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=66.464, loss_att=52.839, acc=0.721, loss=56.927, backward_time=1.028, grad_norm=90.007, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.664e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 06:40:22,816 (trainer:732) INFO: 30epoch:train:1501-1600batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=82.214, loss_att=60.679, acc=0.706, loss=67.140, backward_time=1.026, grad_norm=109.111, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.663e-05, train_time=2.709 +[gpub005:0/64] 2023-07-09 06:41:54,164 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub005:0/64] 2023-07-09 06:42:12,754 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 06:42:16,188 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 06:42:16,188 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub005:0/64] 2023-07-09 06:42:16,194 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 06:46:07,824 (trainer:732) INFO: 30epoch:train:1601-1700batch: iter_time=1.414, forward_time=0.155, loss_ctc=72.256, loss_att=57.905, acc=0.707, loss=62.210, backward_time=1.043, grad_norm=109.824, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.184, optim0_lr0=6.662e-05, train_time=6.900 +[gpub005:0/64] 2023-07-09 06:48:25,273 (trainer:732) INFO: 30epoch:train:1701-1800batch: iter_time=1.219e-04, forward_time=0.146, loss_ctc=71.125, loss_att=56.929, acc=0.694, loss=61.188, backward_time=1.037, grad_norm=93.078, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.661e-05, train_time=2.749 +[gpub005:0/64] 2023-07-09 06:50:41,031 (trainer:732) INFO: 30epoch:train:1801-1900batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=68.578, loss_att=54.880, acc=0.697, loss=58.990, backward_time=1.028, grad_norm=105.880, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.660e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 06:52:57,455 (trainer:732) INFO: 30epoch:train:1901-2000batch: iter_time=0.001, forward_time=0.148, loss_ctc=71.582, loss_att=53.088, acc=0.699, loss=58.636, backward_time=1.028, grad_norm=122.721, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.658e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 06:55:13,258 (trainer:732) INFO: 30epoch:train:2001-2100batch: iter_time=1.386e-04, forward_time=0.146, loss_ctc=73.218, loss_att=56.105, acc=0.697, loss=61.239, backward_time=1.028, grad_norm=100.719, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.657e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 06:57:30,072 (trainer:732) INFO: 30epoch:train:2101-2200batch: iter_time=6.216e-04, forward_time=0.150, loss_ctc=74.433, loss_att=53.049, acc=0.690, loss=59.464, backward_time=1.031, grad_norm=103.823, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.656e-05, train_time=2.734 +[gpub005:0/64] 2023-07-09 06:59:47,604 (trainer:732) INFO: 30epoch:train:2201-2300batch: iter_time=6.988e-04, forward_time=0.161, loss_ctc=68.122, loss_att=55.081, acc=0.701, loss=58.993, backward_time=1.028, grad_norm=102.516, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.184, optim0_lr0=6.655e-05, train_time=2.752 +[gpub005:0/64] 2023-07-09 07:02:03,279 (trainer:732) INFO: 30epoch:train:2301-2400batch: iter_time=9.358e-04, forward_time=0.146, loss_ctc=81.052, loss_att=59.227, acc=0.699, loss=65.775, backward_time=1.027, grad_norm=123.594, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.654e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 07:04:18,965 (trainer:732) INFO: 30epoch:train:2401-2500batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=70.548, loss_att=54.052, acc=0.711, loss=59.001, backward_time=1.027, grad_norm=110.402, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.652e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 07:04:24,292 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub005:0/64] 2023-07-09 07:04:42,292 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 07:04:45,699 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 07:04:45,699 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub005:0/64] 2023-07-09 07:04:45,777 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 07:11:21,417 (trainer:732) INFO: 30epoch:train:2501-2600batch: iter_time=1.621, forward_time=0.147, loss_ctc=77.350, loss_att=59.850, acc=0.713, loss=65.100, backward_time=1.046, grad_norm=102.810, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.651e-05, train_time=8.449 +[gpub005:0/64] 2023-07-09 07:13:37,716 (trainer:732) INFO: 30epoch:train:2601-2700batch: iter_time=1.199e-04, forward_time=0.147, loss_ctc=64.507, loss_att=51.570, acc=0.715, loss=55.451, backward_time=1.029, grad_norm=104.183, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.650e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 07:15:54,008 (trainer:732) INFO: 30epoch:train:2701-2800batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=75.266, loss_att=54.346, acc=0.696, loss=60.622, backward_time=1.032, grad_norm=108.024, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.649e-05, train_time=2.726 +[gpub005:0/64] 2023-07-09 07:18:09,966 (trainer:732) INFO: 30epoch:train:2801-2900batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=68.932, loss_att=47.991, acc=0.727, loss=54.273, backward_time=1.030, grad_norm=91.719, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.648e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 07:20:25,937 (trainer:732) INFO: 30epoch:train:2901-3000batch: iter_time=1.119e-04, forward_time=0.147, loss_ctc=76.108, loss_att=56.964, acc=0.689, loss=62.707, backward_time=1.030, grad_norm=111.677, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.647e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 07:22:41,672 (trainer:732) INFO: 30epoch:train:3001-3100batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=68.551, loss_att=47.543, acc=0.711, loss=53.845, backward_time=1.028, grad_norm=94.809, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.645e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 07:24:57,391 (trainer:732) INFO: 30epoch:train:3101-3200batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=76.505, loss_att=59.918, acc=0.709, loss=64.894, backward_time=1.028, grad_norm=102.831, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.644e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 07:27:13,353 (trainer:732) INFO: 30epoch:train:3201-3300batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=74.867, loss_att=58.218, acc=0.713, loss=63.213, backward_time=1.029, grad_norm=165.724, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.643e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 07:28:00,817 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub005:0/64] 2023-07-09 07:28:18,947 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 07:28:22,591 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 07:28:22,591 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub005:0/64] 2023-07-09 07:28:22,597 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 07:34:22,922 (trainer:732) INFO: 30epoch:train:3301-3400batch: iter_time=1.234, forward_time=0.170, loss_ctc=80.726, loss_att=62.733, acc=0.711, loss=68.131, backward_time=1.040, grad_norm=125.085, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.185, optim0_lr0=6.642e-05, train_time=8.591 +[gpub005:0/64] 2023-07-09 07:36:40,744 (trainer:732) INFO: 30epoch:train:3401-3500batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=69.925, loss_att=55.881, acc=0.710, loss=60.094, backward_time=1.036, grad_norm=119.662, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.641e-05, train_time=2.756 +[gpub005:0/64] 2023-07-09 07:38:56,449 (trainer:732) INFO: 30epoch:train:3501-3600batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=69.586, loss_att=53.200, acc=0.709, loss=58.116, backward_time=1.029, grad_norm=123.761, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.640e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 07:41:13,578 (trainer:732) INFO: 30epoch:train:3601-3700batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=71.179, loss_att=51.608, acc=0.710, loss=57.480, backward_time=1.027, grad_norm=100.747, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.638e-05, train_time=2.742 +[gpub005:0/64] 2023-07-09 07:43:30,037 (trainer:732) INFO: 30epoch:train:3701-3800batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=71.628, loss_att=52.400, acc=0.715, loss=58.169, backward_time=1.029, grad_norm=96.842, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.637e-05, train_time=2.729 +[gpub005:0/64] 2023-07-09 07:45:45,881 (trainer:732) INFO: 30epoch:train:3801-3900batch: iter_time=1.200e-04, forward_time=0.147, loss_ctc=72.724, loss_att=52.528, acc=0.693, loss=58.587, backward_time=1.028, grad_norm=108.896, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.636e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 07:48:11,019 (trainer:732) INFO: 30epoch:train:3901-4000batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=70.380, loss_att=54.615, acc=0.715, loss=59.344, backward_time=1.089, grad_norm=115.084, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.635e-05, train_time=2.903 +[gpub005:0/64] 2023-07-09 07:50:29,931 (trainer:732) INFO: 30epoch:train:4001-4100batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=79.491, loss_att=57.454, acc=0.715, loss=64.065, backward_time=1.034, grad_norm=115.797, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.634e-05, train_time=2.778 +[gpub005:0/64] 2023-07-09 07:52:17,245 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub005:0/64] 2023-07-09 07:52:35,320 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 07:52:38,875 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 07:52:38,875 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub005:0/64] 2023-07-09 07:52:38,881 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 07:57:13,828 (trainer:732) INFO: 30epoch:train:4101-4200batch: iter_time=1.808, forward_time=0.172, loss_ctc=73.203, loss_att=63.564, acc=0.703, loss=66.455, backward_time=1.059, grad_norm=135.634, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.633e-05, train_time=8.078 +[gpub005:0/64] 2023-07-09 07:59:30,354 (trainer:732) INFO: 30epoch:train:4201-4300batch: iter_time=1.169e-04, forward_time=0.148, loss_ctc=77.444, loss_att=60.785, acc=0.709, loss=65.783, backward_time=1.032, grad_norm=116.536, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.631e-05, train_time=2.730 +[gpub005:0/64] 2023-07-09 08:01:50,592 (trainer:732) INFO: 30epoch:train:4301-4400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=64.714, loss_att=52.178, acc=0.713, loss=55.939, backward_time=1.030, grad_norm=93.861, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.630e-05, train_time=2.805 +[gpub005:0/64] 2023-07-09 08:04:18,788 (trainer:732) INFO: 30epoch:train:4401-4500batch: iter_time=1.166e-04, forward_time=0.147, loss_ctc=74.033, loss_att=54.299, acc=0.707, loss=60.219, backward_time=1.056, grad_norm=102.941, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.629e-05, train_time=2.964 +[gpub005:0/64] 2023-07-09 08:06:38,566 (trainer:732) INFO: 30epoch:train:4501-4600batch: iter_time=1.187e-04, forward_time=0.144, loss_ctc=68.971, loss_att=48.825, acc=0.724, loss=54.868, backward_time=1.033, grad_norm=97.902, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.628e-05, train_time=2.795 +[gpub005:0/64] 2023-07-09 08:09:01,583 (trainer:732) INFO: 30epoch:train:4601-4700batch: iter_time=1.106e-04, forward_time=0.144, loss_ctc=74.694, loss_att=55.882, acc=0.686, loss=61.525, backward_time=1.036, grad_norm=154.070, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.627e-05, train_time=2.860 +[gpub005:0/64] 2023-07-09 08:11:17,207 (trainer:732) INFO: 30epoch:train:4701-4800batch: iter_time=1.118e-04, forward_time=0.145, loss_ctc=69.389, loss_att=49.217, acc=0.709, loss=55.269, backward_time=1.024, grad_norm=102.401, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.626e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 08:13:34,842 (trainer:732) INFO: 30epoch:train:4801-4900batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=76.215, loss_att=58.985, acc=0.716, loss=64.154, backward_time=1.029, grad_norm=102.975, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.624e-05, train_time=2.752 +[gpub005:0/64] 2023-07-09 08:15:50,733 (trainer:732) INFO: 30epoch:train:4901-5000batch: iter_time=1.189e-04, forward_time=0.147, loss_ctc=73.406, loss_att=57.659, acc=0.715, loss=62.383, backward_time=1.029, grad_norm=107.677, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.623e-05, train_time=2.718 +[gpub005:0/64] 2023-07-09 08:15:55,286 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub005:0/64] 2023-07-09 08:16:13,554 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 08:16:17,414 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 08:16:17,414 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub005:0/64] 2023-07-09 08:16:17,420 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 08:21:39,635 (trainer:732) INFO: 30epoch:train:5001-5100batch: iter_time=1.348, forward_time=0.148, loss_ctc=79.858, loss_att=65.978, acc=0.708, loss=70.142, backward_time=1.055, grad_norm=110.794, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.622e-05, train_time=6.978 +[gpub005:0/64] 2023-07-09 08:24:02,275 (trainer:732) INFO: 30epoch:train:5101-5200batch: iter_time=1.025e-04, forward_time=0.147, loss_ctc=68.121, loss_att=55.090, acc=0.713, loss=58.999, backward_time=1.036, grad_norm=97.120, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.621e-05, train_time=2.853 +[gpub005:0/64] 2023-07-09 08:26:17,891 (trainer:732) INFO: 30epoch:train:5201-5300batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=67.439, loss_att=45.809, acc=0.718, loss=52.298, backward_time=1.026, grad_norm=94.714, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.620e-05, train_time=2.712 +[gpub005:0/64] 2023-07-09 08:28:33,694 (trainer:732) INFO: 30epoch:train:5301-5400batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=65.888, loss_att=50.949, acc=0.715, loss=55.431, backward_time=1.027, grad_norm=105.113, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.619e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 08:30:49,534 (trainer:732) INFO: 30epoch:train:5401-5500batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=75.721, loss_att=55.905, acc=0.700, loss=61.850, backward_time=1.027, grad_norm=107.472, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.617e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 08:33:06,008 (trainer:732) INFO: 30epoch:train:5501-5600batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=72.755, loss_att=50.694, acc=0.711, loss=57.312, backward_time=1.028, grad_norm=108.338, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.616e-05, train_time=2.729 +[gpub005:0/64] 2023-07-09 08:35:21,994 (trainer:732) INFO: 30epoch:train:5601-5700batch: iter_time=9.944e-05, forward_time=0.147, loss_ctc=71.018, loss_att=55.206, acc=0.714, loss=59.950, backward_time=1.029, grad_norm=106.123, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.615e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 08:37:37,976 (trainer:732) INFO: 30epoch:train:5701-5800batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=75.038, loss_att=60.040, acc=0.717, loss=64.539, backward_time=1.029, grad_norm=119.425, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.614e-05, train_time=2.719 +[gpub005:0/64] 2023-07-09 08:38:26,516 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub005:0/64] 2023-07-09 08:38:44,159 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 08:38:47,519 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 08:38:47,520 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub005:0/64] 2023-07-09 08:38:47,581 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 08:44:06,011 (trainer:732) INFO: 30epoch:train:5801-5900batch: iter_time=1.453, forward_time=0.147, loss_ctc=74.094, loss_att=57.930, acc=0.715, loss=62.779, backward_time=1.047, grad_norm=106.855, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.613e-05, train_time=7.760 +[gpub005:0/64] 2023-07-09 08:46:23,423 (trainer:732) INFO: 30epoch:train:5901-6000batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=66.844, loss_att=52.858, acc=0.715, loss=57.054, backward_time=1.032, grad_norm=103.900, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.612e-05, train_time=2.748 +[gpub005:0/64] 2023-07-09 08:48:41,143 (trainer:732) INFO: 30epoch:train:6001-6100batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=65.513, loss_att=47.511, acc=0.713, loss=52.911, backward_time=1.033, grad_norm=110.804, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.610e-05, train_time=2.754 +[gpub005:0/64] 2023-07-09 08:50:57,547 (trainer:732) INFO: 30epoch:train:6101-6200batch: iter_time=1.083e-04, forward_time=0.146, loss_ctc=66.188, loss_att=50.999, acc=0.714, loss=55.556, backward_time=1.029, grad_norm=98.669, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.609e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 08:53:13,408 (trainer:732) INFO: 30epoch:train:6201-6300batch: iter_time=1.085e-04, forward_time=0.146, loss_ctc=75.870, loss_att=56.326, acc=0.701, loss=62.189, backward_time=1.029, grad_norm=108.691, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.608e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 08:55:30,067 (trainer:732) INFO: 30epoch:train:6301-6400batch: iter_time=1.029e-04, forward_time=0.146, loss_ctc=72.916, loss_att=50.933, acc=0.708, loss=57.528, backward_time=1.028, grad_norm=107.518, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.607e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 08:57:48,148 (trainer:732) INFO: 30epoch:train:6401-6500batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=71.883, loss_att=55.390, acc=0.712, loss=60.338, backward_time=1.029, grad_norm=98.625, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.606e-05, train_time=2.761 +[gpub005:0/64] 2023-07-09 09:00:03,990 (trainer:732) INFO: 30epoch:train:6501-6600batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=74.721, loss_att=57.672, acc=0.714, loss=62.787, backward_time=1.029, grad_norm=99.654, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.605e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 09:01:37,401 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub005:0/64] 2023-07-09 09:01:55,924 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 09:01:59,360 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 09:01:59,360 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub005:0/64] 2023-07-09 09:01:59,366 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 09:05:31,034 (trainer:732) INFO: 30epoch:train:6601-6700batch: iter_time=1.189, forward_time=0.188, loss_ctc=75.246, loss_att=59.358, acc=0.716, loss=64.124, backward_time=1.048, grad_norm=107.046, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.185, optim0_lr0=6.604e-05, train_time=6.541 +[gpub005:0/64] 2023-07-09 09:07:47,651 (trainer:732) INFO: 30epoch:train:6701-6800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=76.815, loss_att=61.302, acc=0.695, loss=65.956, backward_time=1.029, grad_norm=110.285, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.602e-05, train_time=2.732 +[gpub005:0/64] 2023-07-09 09:10:03,733 (trainer:732) INFO: 30epoch:train:6801-6900batch: iter_time=1.232e-04, forward_time=0.147, loss_ctc=63.395, loss_att=53.738, acc=0.705, loss=56.635, backward_time=1.029, grad_norm=93.495, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.601e-05, train_time=2.721 +[gpub005:0/64] 2023-07-09 09:12:20,215 (trainer:732) INFO: 30epoch:train:6901-7000batch: iter_time=1.293e-04, forward_time=0.146, loss_ctc=73.005, loss_att=53.910, acc=0.703, loss=59.639, backward_time=1.029, grad_norm=116.643, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.600e-05, train_time=2.729 +[gpub005:0/64] 2023-07-09 09:14:35,852 (trainer:732) INFO: 30epoch:train:7001-7100batch: iter_time=1.414e-04, forward_time=0.146, loss_ctc=68.606, loss_att=50.078, acc=0.714, loss=55.637, backward_time=1.028, grad_norm=91.992, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.599e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 09:16:56,392 (trainer:732) INFO: 30epoch:train:7101-7200batch: iter_time=1.186e-04, forward_time=0.174, loss_ctc=72.330, loss_att=55.852, acc=0.689, loss=60.795, backward_time=1.029, grad_norm=103.971, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.598e-05, train_time=2.811 +[gpub005:0/64] 2023-07-09 09:19:14,156 (trainer:732) INFO: 30epoch:train:7201-7300batch: iter_time=1.131e-04, forward_time=0.161, loss_ctc=66.916, loss_att=49.747, acc=0.706, loss=54.898, backward_time=1.027, grad_norm=91.907, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.597e-05, train_time=2.754 +[gpub005:0/64] 2023-07-09 09:21:33,212 (trainer:732) INFO: 30epoch:train:7301-7400batch: iter_time=1.143e-04, forward_time=0.147, loss_ctc=74.799, loss_att=59.157, acc=0.709, loss=63.849, backward_time=1.031, grad_norm=114.878, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.595e-05, train_time=2.782 +[gpub005:0/64] 2023-07-09 09:23:48,998 (trainer:732) INFO: 30epoch:train:7401-7500batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=71.607, loss_att=57.464, acc=0.706, loss=61.707, backward_time=1.027, grad_norm=123.667, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.594e-05, train_time=2.715 +[gpub005:0/64] 2023-07-09 09:24:03,715 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub005:0/64] 2023-07-09 09:24:22,085 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 09:24:25,849 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 09:24:25,849 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub005:0/64] 2023-07-09 09:24:25,855 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 09:30:03,490 (trainer:732) INFO: 30epoch:train:7501-7600batch: iter_time=2.212, forward_time=0.145, loss_ctc=79.143, loss_att=65.331, acc=0.698, loss=69.474, backward_time=1.041, grad_norm=116.935, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.593e-05, train_time=7.490 +[gpub005:0/64] 2023-07-09 09:32:20,507 (trainer:732) INFO: 30epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=68.209, loss_att=54.927, acc=0.710, loss=58.912, backward_time=1.029, grad_norm=98.114, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.592e-05, train_time=2.740 +[gpub005:0/64] 2023-07-09 09:34:37,364 (trainer:732) INFO: 30epoch:train:7701-7800batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=67.320, loss_att=47.178, acc=0.705, loss=53.221, backward_time=1.028, grad_norm=107.580, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.591e-05, train_time=2.737 +[gpub005:0/64] 2023-07-09 09:36:52,907 (trainer:732) INFO: 30epoch:train:7801-7900batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=65.382, loss_att=51.048, acc=0.714, loss=55.348, backward_time=1.025, grad_norm=94.448, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.590e-05, train_time=2.711 +[gpub005:0/64] 2023-07-09 09:39:08,490 (trainer:732) INFO: 30epoch:train:7901-8000batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=74.717, loss_att=57.283, acc=0.693, loss=62.513, backward_time=1.026, grad_norm=109.439, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.589e-05, train_time=2.711 +[gpub005:0/64] 2023-07-09 09:41:24,030 (trainer:732) INFO: 30epoch:train:8001-8100batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=72.670, loss_att=51.262, acc=0.705, loss=57.684, backward_time=1.025, grad_norm=114.707, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.587e-05, train_time=2.711 +[gpub005:0/64] 2023-07-09 09:43:40,044 (trainer:732) INFO: 30epoch:train:8101-8200batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=71.182, loss_att=53.993, acc=0.710, loss=59.150, backward_time=1.030, grad_norm=103.073, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.586e-05, train_time=2.720 +[gpub005:0/64] 2023-07-09 09:45:55,894 (trainer:732) INFO: 30epoch:train:8201-8300batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=75.740, loss_att=60.546, acc=0.707, loss=65.104, backward_time=1.028, grad_norm=105.365, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.585e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 09:46:42,025 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub005:0/64] 2023-07-09 09:47:00,609 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 09:47:04,055 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 09:47:04,055 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub005:0/64] 2023-07-09 09:47:04,061 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 09:52:57,152 (trainer:732) INFO: 30epoch:train:8301-8400batch: iter_time=1.228, forward_time=0.146, loss_ctc=77.226, loss_att=61.263, acc=0.697, loss=66.052, backward_time=1.037, grad_norm=105.579, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.584e-05, train_time=8.425 +[gpub005:0/64] 2023-07-09 09:55:13,828 (trainer:732) INFO: 30epoch:train:8401-8500batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=69.146, loss_att=55.329, acc=0.709, loss=59.474, backward_time=1.029, grad_norm=107.471, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.583e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 09:57:29,536 (trainer:732) INFO: 30epoch:train:8501-8600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=66.513, loss_att=53.263, acc=0.703, loss=57.238, backward_time=1.025, grad_norm=96.505, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.582e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 09:59:45,955 (trainer:732) INFO: 30epoch:train:8601-8700batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=70.589, loss_att=51.866, acc=0.707, loss=57.483, backward_time=1.029, grad_norm=107.538, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.581e-05, train_time=2.728 +[gpub005:0/64] 2023-07-09 10:02:01,664 (trainer:732) INFO: 30epoch:train:8701-8800batch: iter_time=1.318e-04, forward_time=0.146, loss_ctc=70.315, loss_att=53.128, acc=0.709, loss=58.284, backward_time=1.029, grad_norm=101.284, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.579e-05, train_time=2.714 +[gpub005:0/64] 2023-07-09 10:04:17,103 (trainer:732) INFO: 30epoch:train:8801-8900batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=75.096, loss_att=52.904, acc=0.696, loss=59.561, backward_time=1.027, grad_norm=104.484, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.578e-05, train_time=2.709 +[gpub005:0/64] 2023-07-09 10:06:32,753 (trainer:732) INFO: 30epoch:train:8901-9000batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=67.620, loss_att=53.536, acc=0.711, loss=57.762, backward_time=1.028, grad_norm=97.983, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.577e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 10:08:48,415 (trainer:732) INFO: 30epoch:train:9001-9100batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=79.714, loss_att=58.394, acc=0.703, loss=64.790, backward_time=1.029, grad_norm=104.995, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.576e-05, train_time=2.713 +[gpub005:0/64] 2023-07-09 10:10:19,724 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub005:0/64] 2023-07-09 10:10:37,937 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub005:0/64] 2023-07-09 10:10:41,460 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub005:0/64] 2023-07-09 10:10:41,461 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub005:0/64] 2023-07-09 10:10:41,467 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub005:0/64] 2023-07-09 10:13:44,726 (trainer:732) INFO: 30epoch:train:9101-9200batch: iter_time=1.276, forward_time=0.172, loss_ctc=72.067, loss_att=56.062, acc=0.707, loss=60.863, backward_time=1.041, grad_norm=115.598, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.184, optim0_lr0=6.575e-05, train_time=5.926 +[gpub005:0/64] 2023-07-09 10:16:01,376 (trainer:732) INFO: 30epoch:train:9201-9300batch: iter_time=1.195e-04, forward_time=0.145, loss_ctc=70.122, loss_att=56.197, acc=0.700, loss=60.374, backward_time=1.030, grad_norm=113.875, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.574e-05, train_time=2.733 +[gpub005:0/64] 2023-07-09 10:18:17,221 (trainer:732) INFO: 30epoch:train:9301-9400batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=66.656, loss_att=53.634, acc=0.703, loss=57.541, backward_time=1.027, grad_norm=107.518, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.573e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 10:20:33,434 (trainer:732) INFO: 30epoch:train:9401-9500batch: iter_time=1.426e-04, forward_time=0.146, loss_ctc=70.982, loss_att=51.945, acc=0.706, loss=57.656, backward_time=1.028, grad_norm=103.989, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.572e-05, train_time=2.724 +[gpub005:0/64] 2023-07-09 10:22:49,713 (trainer:732) INFO: 30epoch:train:9501-9600batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=71.163, loss_att=53.531, acc=0.707, loss=58.820, backward_time=1.028, grad_norm=106.954, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.570e-05, train_time=2.725 +[gpub005:0/64] 2023-07-09 10:25:05,526 (trainer:732) INFO: 30epoch:train:9601-9700batch: iter_time=1.256e-04, forward_time=0.145, loss_ctc=72.003, loss_att=50.687, acc=0.699, loss=57.082, backward_time=1.026, grad_norm=95.981, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.569e-05, train_time=2.716 +[gpub005:0/64] 2023-07-09 10:27:21,391 (trainer:732) INFO: 30epoch:train:9701-9800batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=67.314, loss_att=53.367, acc=0.709, loss=57.551, backward_time=1.028, grad_norm=101.436, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.568e-05, train_time=2.717 +[gpub005:0/64] 2023-07-09 10:29:37,738 (trainer:732) INFO: 30epoch:train:9801-9900batch: iter_time=1.103e-04, forward_time=0.144, loss_ctc=79.876, loss_att=57.750, acc=0.705, loss=64.387, backward_time=1.026, grad_norm=108.714, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.567e-05, train_time=2.727 +[gpub005:0/64] 2023-07-09 10:31:53,636 (trainer:732) INFO: 30epoch:train:9901-10000batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=69.642, loss_att=52.891, acc=0.711, loss=57.916, backward_time=1.026, grad_norm=127.204, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.566e-05, train_time=2.718 +gpub030:2531971:2532059 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub018:1650755:1650841 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub012:1607819:1607909 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub014:1495254:1495339 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub014:1495256:1495336 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub030:2531969:2532058 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub030:2531970:2532060 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub096:1645785:1645863 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub097:1705871:1705965 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub013:1694053:1694137 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub013:1694056:1694139 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub013:1694055:1694136 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub018:1650753:1650840 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub098:1875740:1875815 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub098:1875738:1875816 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub098:1875741:1875817 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub012:1607821:1607907 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub012:1607818:1607910 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub014:1495255:1495338 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub014:1495257:1495337 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub097:1705870:1705966 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub030:2531972:2532057 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub018:1650756:1650842 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub096:1645786:1645864 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub096:1645787:1645865 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub096:1645784:1645866 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub012:1607820:1607908 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub018:1650755:1650755 [2] NCCL INFO comm 0x513374c0 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub039:2093177:2093253 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub039:2093176:2093252 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub018:1650754:1650839 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub095:2520060:2520144 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub039:2093175:2093254 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub097:1705871:1705871 [3] NCCL INFO comm 0x94d2db0 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub039:2093178:2093251 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub040:2093693:2093783 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub040:2093691:2093781 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub040:2093692:2093780 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub095:2520059:2520143 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub040:2093690:2093782 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub072:1805521:1805611 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub095:2520061:2520145 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub013:1694056:1694056 [3] NCCL INFO comm 0x8c00090 rank 11 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub098:1875739:1875818 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub095:2520062:2520146 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub072:1805522:1805612 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub097:1705868:1705963 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub097:1705869:1705964 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub072:1805519:1805613 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub005:2408154:2408234 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub072:1805520:1805610 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub098:1875739:1875739 [1] NCCL INFO comm 0x4ffeee90 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub098:1875740:1875740 [2] NCCL INFO comm 0x8c9fbb0 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub030:2531969:2531969 [0] NCCL INFO comm 0xb4f6de0 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub013:1694055:1694055 [2] NCCL INFO comm 0xf6b9b10 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub013:1694053:1694053 [0] NCCL INFO comm 0x8c6ae750 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub096:1645785:1645785 [1] NCCL INFO comm 0x50f7e840 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub097:1705870:1705870 [2] NCCL INFO comm 0x50f117a0 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub012:1607820:1607820 [2] NCCL INFO comm 0x503f1430 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub098:1875741:1875741 [3] NCCL INFO comm 0x4ecd4ee0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub072:1805521:1805521 [2] NCCL INFO comm 0x8d829e60 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub096:1645786:1645786 [2] NCCL INFO comm 0x4fe9cb90 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub039:2093177:2093177 [2] NCCL INFO comm 0xa965b10 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub013:1694054:1694138 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub072:1805520:1805520 [1] NCCL INFO comm 0xb6f41780 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub098:1875738:1875738 [0] NCCL INFO comm 0x9e5ca730 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub030:2531972:2531972 [3] NCCL INFO comm 0xa2cf1d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub018:1650753:1650753 [0] NCCL INFO comm 0x4f7a1b90 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub097:1705869:1705869 [1] NCCL INFO comm 0x8e89510 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub018:1650756:1650756 [3] NCCL INFO comm 0x8c504da0 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub012:1607818:1607818 [0] NCCL INFO comm 0xa8f3bc80 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub072:1805519:1805519 [0] NCCL INFO comm 0x4fb13ad0 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub005:2408152:2408235 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub005:2408153:2408237 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub030:2531970:2531970 [1] NCCL INFO comm 0x8ebc3340 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub030:2531971:2531971 [2] NCCL INFO comm 0x8dd18cd0 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub095:2520059:2520059 [0] NCCL INFO comm 0x15bb1c50 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub039:2093175:2093175 [0] NCCL INFO comm 0xa2cab60 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub012:1607819:1607819 [1] NCCL INFO comm 0xa4ee840 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub072:1805522:1805522 [3] NCCL INFO comm 0x50740450 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub005:2408154:2408154 [3] NCCL INFO comm 0x519a6580 rank 3 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub018:1650754:1650754 [1] NCCL INFO comm 0xa938d420 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub039:2093176:2093176 [1] NCCL INFO comm 0xbcbaabd0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub095:2520061:2520061 [2] NCCL INFO comm 0x91b7930 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub096:1645787:1645787 [3] NCCL INFO comm 0xb78b6390 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub012:1607821:1607821 [3] NCCL INFO comm 0x516c3430 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub039:2093178:2093178 [3] NCCL INFO comm 0x4fc75960 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub005:2408153:2408153 [2] NCCL INFO comm 0x4fab6870 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub084:4052710:4052803 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub040:2093690:2093690 [0] NCCL INFO comm 0xba9dc4d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub014:1495254:1495254 [0] NCCL INFO comm 0x50fe0a80 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub084:4052709:4052802 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub084:4052708:4052804 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub084:4052711:4052801 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub096:1645784:1645784 [0] NCCL INFO comm 0xcdcc14f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub014:1495256:1495256 [2] NCCL INFO comm 0x9f383a90 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub005:2408152:2408152 [1] NCCL INFO comm 0x50e7e140 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub040:2093692:2093692 [2] NCCL INFO comm 0x514bd130 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub013:1694054:1694054 [1] NCCL INFO comm 0x5088d590 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub095:2520062:2520062 [3] NCCL INFO comm 0x8c7104c0 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub095:2520060:2520060 [1] NCCL INFO comm 0xb4653490 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub040:2093691:2093691 [1] NCCL INFO comm 0xb9336880 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub014:1495257:1495257 [3] NCCL INFO comm 0x946a450 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub097:1705868:1705868 [0] NCCL INFO comm 0x4f565ad0 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub040:2093693:2093693 [3] NCCL INFO comm 0xbd6eac10 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub014:1495255:1495255 [1] NCCL INFO comm 0x515d3c50 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub084:4052708:4052708 [0] NCCL INFO comm 0xb576c9d0 rank 44 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub084:4052710:4052710 [2] NCCL INFO comm 0x4f81fce0 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub084:4052709:4052709 [1] NCCL INFO comm 0xd834420 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub084:4052711:4052711 [3] NCCL INFO comm 0xa5710b50 rank 47 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub041:1527385:1527468 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub041:1527384:1527467 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub041:1527385:1527385 [2] NCCL INFO comm 0x5082e9e0 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub041:1527383:1527470 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub041:1527386:1527469 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub041:1527386:1527386 [3] NCCL INFO comm 0x4f979490 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub041:1527384:1527384 [1] NCCL INFO comm 0x512a04d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub041:1527383:1527383 [0] NCCL INFO comm 0x5103b480 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub067:1574057:1574141 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub067:1574055:1574142 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub067:1574056:1574140 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub067:1574057:1574057 [3] NCCL INFO comm 0x8d973650 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub067:1574055:1574055 [1] NCCL INFO comm 0x509b90f0 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub067:1574054:1574139 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub067:1574056:1574056 [2] NCCL INFO comm 0xb006d7d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub067:1574054:1574054 [0] NCCL INFO comm 0x4f342150 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +[gpub005:0/64] 2023-07-09 10:43:52,593 (trainer:338) INFO: 30epoch results: [train] iter_time=0.191, forward_time=0.148, loss_ctc=72.388, loss_att=55.270, acc=0.706, loss=60.405, backward_time=1.032, grad_norm=108.407, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.623e-05, train_time=3.318, time=4 hours, 36 minutes and 42.57 seconds, total_count=270000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=46.634, cer_ctc=0.267, loss_att=40.617, acc=0.667, cer=0.399, wer=0.998, loss=42.422, time=5 minutes and 56.2 seconds, total_count=27830, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 53.71 seconds, total_count=0, gpu_max_cached_mem_GB=38.234 +[gpub005:0/64] 2023-07-09 10:44:08,080 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub005:0/64] 2023-07-09 10:44:08,120 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till30epoch.pth +[gpub005:0/64] 2023-07-09 10:45:00,456 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till30epoch.pth +[gpub005:0/64] 2023-07-09 10:45:39,702 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/22epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/25epoch.pth +[gpub005:0/64] 2023-07-09 10:45:39,703 (trainer:458) INFO: The training was finished at 30 epochs +[gpub005:0/64] 2023-07-09 10:45:39,705 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.pth +[gpub005:0/64] 2023-07-09 10:45:50,816 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.pth +gpub005:2408151:2408236 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub005:2408151:2408151 [0] NCCL INFO comm 0x8dda0850 rank 0 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +# Accounting: begin_time=1688778318 +# Accounting: end_time=1688917563 +# Accounting: time=139245 threads=1 +# Finished at Sun Jul 9 10:46:03 CDT 2023 with status 0 diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log new file mode 100644 index 0000000000000000000000000000000000000000..8db1f95dd1757f8d117cc406623f88fa4234d53f --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log @@ -0,0 +1,5571 @@ +# Running on gpua003.delta.ncsa.illinois.edu +# Started at Wed Jul 5 22:37:23 CDT 2023 +# SLURMD_NODENAME=gpua003 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2132611 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2132611 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpua[003,005,010,025,028-029,031,035,053,055,057,060,074,087,090,098]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA100x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpua[003,005,010,025,028-029,031,035,053,055,057,060,074,087,090,098]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=350544 +# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua003 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 +[gpua003:0/64] 2023-07-05 22:40:37,448 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpua003:0/64] 2023-07-05 22:40:38,431 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpua003:0/64] 2023-07-05 22:40:38,458 (s2t:483) INFO: Vocabulary size: 50002 +[gpua003:0/64] 2023-07-05 22:40:52,612 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpua003:0/64] 2023-07-05 22:40:52,630 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpua003:0/64] 2023-07-05 22:40:53,329 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpua003:0/64] 2023-07-05 22:41:01,373 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 22:41:01,580 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 22:41:01,580 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpua003:0/64] 2023-07-05 22:41:01,582 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpua003:0/64] 2023-07-05 22:41:02,091 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 22:41:02,408 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 22:41:02,409 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpua003:0/64] 2023-07-05 22:41:02,409 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpua003:0/64] 2023-07-05 22:41:33,411 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpua003:350633:350633 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0> +gpua003:350633:350633 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua003:350633:350633 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpua003:0/64] 2023-07-05 22:41:38,247 (trainer:284) INFO: 14/100epoch started +[gpua003:0/64] 2023-07-05 22:41:38,292 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-05 22:41:57,218 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 22:42:00,682 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 22:42:00,683 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-05 22:42:00,689 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpua031:1680700:1680700 [0] NCCL INFO cudaDriverVersion 12010 +gpua031:1680700:1680700 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0> +gpua031:1680700:1680700 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua031:1680700:1680773 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0> +gpua031:1680700:1680773 [0] NCCL INFO Using network IB +gpua031:1680700:1680773 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua031:1680700:1680773 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read +gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read +gpua031:1680700:1680773 [0] NCCL INFO Connected all rings +gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpua031:1680700:1680773 [0] NCCL INFO Connected all trees +gpua031:1680700:1680773 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua031:1680700:1680773 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua031:1680700:1680773 [0] NCCL INFO comm 0xb9862e50 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua031:1680703:1680703 [3] NCCL INFO cudaDriverVersion 12010 +gpua031:1680703:1680703 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0> +gpua031:1680703:1680703 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua031:1680703:1680772 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0> +gpua031:1680703:1680772 [3] NCCL INFO Using network IB +gpua031:1680703:1680772 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua031:1680703:1680772 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpua031:1680703:1680772 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpua031:1680703:1680772 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpua031:1680703:1680772 [3] NCCL INFO Connected all rings +gpua031:1680703:1680772 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read +gpua031:1680703:1680772 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read +gpua031:1680703:1680772 [3] NCCL INFO Connected all trees +gpua031:1680703:1680772 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua031:1680703:1680772 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua031:1680703:1680772 [3] NCCL INFO comm 0x5195fe00 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua031:1680702:1680702 [2] NCCL INFO cudaDriverVersion 12010 +gpua031:1680702:1680702 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0> +gpua031:1680702:1680702 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua031:1680702:1680774 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0> +gpua031:1680702:1680774 [2] NCCL INFO Using network IB +gpua031:1680702:1680774 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua031:1680702:1680774 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpua031:1680702:1680774 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read +gpua031:1680702:1680774 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read +gpua031:1680702:1680774 [2] NCCL INFO Connected all rings +gpua031:1680702:1680774 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read +gpua031:1680702:1680774 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read +gpua031:1680702:1680774 [2] NCCL INFO Connected all trees +gpua031:1680702:1680774 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua031:1680702:1680774 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua031:1680702:1680774 [2] NCCL INFO comm 0x90042a50 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua031:1680701:1680701 [1] NCCL INFO cudaDriverVersion 12010 +gpua031:1680701:1680701 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0> +gpua031:1680701:1680701 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua031:1680701:1680775 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0> +gpua031:1680701:1680775 [1] NCCL INFO Using network IB +gpua031:1680701:1680775 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua031:1680701:1680775 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read +gpua031:1680701:1680775 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read +gpua031:1680701:1680775 [1] NCCL INFO Connected all rings +gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read +gpua031:1680701:1680775 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read +gpua031:1680701:1680775 [1] NCCL INFO Connected all trees +gpua031:1680701:1680775 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua031:1680701:1680775 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua031:1680701:1680775 [1] NCCL INFO comm 0xb74170b0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua029:1226924:1226924 [3] NCCL INFO cudaDriverVersion 12010 +gpua029:1226924:1226924 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0> +gpua029:1226924:1226924 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua029:1226924:1226999 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0> +gpua029:1226924:1226999 [3] NCCL INFO Using network IB +gpua029:1226924:1226999 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua029:1226924:1226999 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpua029:1226924:1226999 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpua029:1226924:1226999 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpua029:1226924:1226999 [3] NCCL INFO Connected all rings +gpua029:1226924:1226999 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read +gpua029:1226924:1226999 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read +gpua029:1226924:1226999 [3] NCCL INFO Connected all trees +gpua029:1226924:1226999 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua029:1226924:1226999 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua029:1226924:1226999 [3] NCCL INFO comm 0x502a1280 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua029:1226921:1226921 [0] NCCL INFO cudaDriverVersion 12010 +gpua029:1226921:1226921 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0> +gpua029:1226921:1226921 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua029:1226921:1226997 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0> +gpua029:1226921:1226997 [0] NCCL INFO Using network IB +gpua029:1226921:1226997 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua029:1226921:1226997 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read +gpua029:1226921:1226997 [0] NCCL INFO Connected all rings +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpua029:1226921:1226997 [0] NCCL INFO Connected all trees +gpua029:1226921:1226997 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua029:1226921:1226997 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua029:1226921:1226997 [0] NCCL INFO comm 0x8dcadfd0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua074:989792:989792 [1] NCCL INFO cudaDriverVersion 12010 +gpua074:989792:989792 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0> +gpua074:989792:989792 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua074:989792:989862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0> +gpua074:989792:989862 [1] NCCL INFO Using network IB +gpua074:989792:989862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua074:989792:989862 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read +gpua074:989792:989862 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read +gpua074:989792:989862 [1] NCCL INFO Connected all rings +gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read +gpua074:989792:989862 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read +gpua074:989792:989862 [1] NCCL INFO Connected all trees +gpua074:989792:989862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua074:989792:989862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua074:989792:989862 [1] NCCL INFO comm 0x91b8e50 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua074:989794:989794 [3] NCCL INFO cudaDriverVersion 12010 +gpua074:989794:989794 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0> +gpua074:989794:989794 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua074:989794:989863 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0> +gpua074:989794:989863 [3] NCCL INFO Using network IB +gpua074:989794:989863 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua074:989794:989863 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpua074:989794:989863 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpua074:989794:989863 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpua074:989794:989863 [3] NCCL INFO Connected all rings +gpua074:989794:989863 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read +gpua074:989794:989863 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read +gpua074:989794:989863 [3] NCCL INFO Connected all trees +gpua074:989794:989863 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua074:989794:989863 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua074:989794:989863 [3] NCCL INFO comm 0x51823d90 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua074:989793:989793 [2] NCCL INFO cudaDriverVersion 12010 +gpua074:989793:989793 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0> +gpua074:989793:989793 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua074:989793:989861 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0> +gpua074:989793:989861 [2] NCCL INFO Using network IB +gpua074:989793:989861 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua074:989793:989861 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpua074:989793:989861 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read +gpua074:989793:989861 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read +gpua074:989793:989861 [2] NCCL INFO Connected all rings +gpua074:989793:989861 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read +gpua074:989793:989861 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read +gpua074:989793:989861 [2] NCCL INFO Connected all trees +gpua074:989793:989861 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua074:989793:989861 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua074:989793:989861 [2] NCCL INFO comm 0x50124340 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua029:1226922:1226922 [1] NCCL INFO cudaDriverVersion 12010 +gpua029:1226922:1226922 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0> +gpua029:1226922:1226922 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua029:1226922:1226996 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0> +gpua029:1226922:1226996 [1] NCCL INFO Using network IB +gpua029:1226922:1226996 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua029:1226922:1226996 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpua029:1226922:1226996 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read +gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read +gpua029:1226922:1226996 [1] NCCL INFO Connected all rings +gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpua029:1226922:1226996 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read +gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read +gpua029:1226922:1226996 [1] NCCL INFO Connected all trees +gpua029:1226922:1226996 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua029:1226922:1226996 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua029:1226922:1226996 [1] NCCL INFO comm 0x91446d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua087:2330953:2330953 [0] NCCL INFO cudaDriverVersion 12010 +gpua087:2330953:2330953 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0> +gpua087:2330953:2330953 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua087:2330953:2331026 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0> +gpua087:2330953:2331026 [0] NCCL INFO Using network IB +gpua087:2330953:2331026 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua087:2330953:2331026 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read +gpua087:2330953:2331026 [0] NCCL INFO Connected all rings +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpua087:2330953:2331026 [0] NCCL INFO Connected all trees +gpua087:2330953:2331026 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua087:2330953:2331026 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua087:2330953:2331026 [0] NCCL INFO comm 0x8805010 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua087:2330955:2330955 [2] NCCL INFO cudaDriverVersion 12010 +gpua087:2330955:2330955 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0> +gpua087:2330955:2330955 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua087:2330955:2331028 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0> +gpua087:2330955:2331028 [2] NCCL INFO Using network IB +gpua087:2330955:2331028 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua087:2330955:2331028 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpua087:2330955:2331028 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read +gpua087:2330955:2331028 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read +gpua087:2330955:2331028 [2] NCCL INFO Connected all rings +gpua087:2330955:2331028 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read +gpua087:2330955:2331028 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read +gpua087:2330955:2331028 [2] NCCL INFO Connected all trees +gpua087:2330955:2331028 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua087:2330955:2331028 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua087:2330955:2331028 [2] NCCL INFO comm 0x1091ecd0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua029:1226923:1226923 [2] NCCL INFO cudaDriverVersion 12010 +gpua029:1226923:1226923 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0> +gpua029:1226923:1226923 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua029:1226923:1226998 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0> +gpua029:1226923:1226998 [2] NCCL INFO Using network IB +gpua029:1226923:1226998 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua029:1226923:1226998 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpua029:1226923:1226998 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read +gpua029:1226923:1226998 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read +gpua029:1226923:1226998 [2] NCCL INFO Connected all rings +gpua029:1226923:1226998 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read +gpua029:1226923:1226998 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read +gpua029:1226923:1226998 [2] NCCL INFO Connected all trees +gpua029:1226923:1226998 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua029:1226923:1226998 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua029:1226923:1226998 [2] NCCL INFO comm 0x9682050 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua057:1814426:1814426 [1] NCCL INFO cudaDriverVersion 12010 +gpua057:1814426:1814426 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> +gpua057:1814426:1814426 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua057:1814426:1814504 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> +gpua057:1814426:1814504 [1] NCCL INFO Using network IB +gpua057:1814426:1814504 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua057:1814426:1814504 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read +gpua057:1814426:1814504 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read +gpua057:1814426:1814504 [1] NCCL INFO Connected all rings +gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read +gpua057:1814426:1814504 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read +gpua057:1814426:1814504 [1] NCCL INFO Connected all trees +gpua057:1814426:1814504 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua057:1814426:1814504 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua057:1814426:1814504 [1] NCCL INFO comm 0xb6887810 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua074:989791:989791 [0] NCCL INFO cudaDriverVersion 12010 +gpua074:989791:989791 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0> +gpua074:989791:989791 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua074:989791:989864 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0> +gpua074:989791:989864 [0] NCCL INFO Using network IB +gpua074:989791:989864 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua074:989791:989864 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read +gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read +gpua074:989791:989864 [0] NCCL INFO Connected all rings +gpua057:1814428:1814428 [3] NCCL INFO cudaDriverVersion 12010 +gpua057:1814428:1814428 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> +gpua057:1814428:1814428 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua057:1814428:1814503 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> +gpua057:1814428:1814503 [3] NCCL INFO Using network IB +gpua057:1814428:1814503 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua057:1814428:1814503 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpua057:1814428:1814503 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpua057:1814428:1814503 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpua057:1814428:1814503 [3] NCCL INFO Connected all rings +gpua057:1814428:1814503 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read +gpua057:1814428:1814503 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read +gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpua074:989791:989864 [0] NCCL INFO Connected all trees +gpua074:989791:989864 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua074:989791:989864 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua074:989791:989864 [0] NCCL INFO comm 0x4f541ea0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua057:1814428:1814503 [3] NCCL INFO Connected all trees +gpua057:1814428:1814503 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua057:1814428:1814503 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua057:1814428:1814503 [3] NCCL INFO comm 0xa830f510 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua057:1814427:1814427 [2] NCCL INFO cudaDriverVersion 12010 +gpua057:1814427:1814427 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> +gpua057:1814427:1814427 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua057:1814427:1814505 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> +gpua057:1814427:1814505 [2] NCCL INFO Using network IB +gpua057:1814427:1814505 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua057:1814427:1814505 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpua057:1814427:1814505 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read +gpua057:1814427:1814505 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read +gpua057:1814427:1814505 [2] NCCL INFO Connected all rings +gpua057:1814427:1814505 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read +gpua057:1814427:1814505 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read +gpua057:1814427:1814505 [2] NCCL INFO Connected all trees +gpua057:1814427:1814505 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua057:1814427:1814505 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua057:1814427:1814505 [2] NCCL INFO comm 0x8ff8bf0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua087:2330956:2330956 [3] NCCL INFO cudaDriverVersion 12010 +gpua087:2330956:2330956 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0> +gpua087:2330956:2330956 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua087:2330956:2331027 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0> +gpua087:2330956:2331027 [3] NCCL INFO Using network IB +gpua087:2330956:2331027 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua087:2330956:2331027 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpua087:2330956:2331027 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpua087:2330956:2331027 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpua087:2330956:2331027 [3] NCCL INFO Connected all rings +gpua087:2330956:2331027 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read +gpua087:2330956:2331027 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read +gpua057:1814425:1814425 [0] NCCL INFO cudaDriverVersion 12010 +gpua057:1814425:1814425 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> +gpua057:1814425:1814425 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua057:1814425:1814506 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> +gpua057:1814425:1814506 [0] NCCL INFO Using network IB +gpua057:1814425:1814506 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua057:1814425:1814506 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read +gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read +gpua057:1814425:1814506 [0] NCCL INFO Connected all rings +gpua087:2330956:2331027 [3] NCCL INFO Connected all trees +gpua087:2330956:2331027 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua087:2330956:2331027 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua087:2330956:2331027 [3] NCCL INFO comm 0x4fa40250 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpua057:1814425:1814506 [0] NCCL INFO Connected all trees +gpua057:1814425:1814506 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua057:1814425:1814506 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua057:1814425:1814506 [0] NCCL INFO comm 0xc0b1e520 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua087:2330954:2330954 [1] NCCL INFO cudaDriverVersion 12010 +gpua087:2330954:2330954 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0> +gpua087:2330954:2330954 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua087:2330954:2331029 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0> +gpua087:2330954:2331029 [1] NCCL INFO Using network IB +gpua087:2330954:2331029 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua087:2330954:2331029 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpua087:2330954:2331029 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read +gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read +gpua087:2330954:2331029 [1] NCCL INFO Connected all rings +gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpua087:2330954:2331029 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read +gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read +gpua087:2330954:2331029 [1] NCCL INFO Connected all trees +gpua087:2330954:2331029 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua087:2330954:2331029 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua087:2330954:2331029 [1] NCCL INFO comm 0xbc380f30 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua028:3269324:3269324 [3] NCCL INFO cudaDriverVersion 12010 +gpua028:3269324:3269324 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0> +gpua028:3269324:3269324 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua028:3269324:3269401 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0> +gpua028:3269324:3269401 [3] NCCL INFO Using network IB +gpua028:3269324:3269401 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua028:3269324:3269401 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpua028:3269324:3269401 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpua028:3269324:3269401 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpua028:3269324:3269401 [3] NCCL INFO Connected all rings +gpua028:3269324:3269401 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read +gpua028:3269324:3269401 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read +gpua028:3269324:3269401 [3] NCCL INFO Connected all trees +gpua028:3269324:3269401 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua028:3269324:3269401 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua028:3269324:3269401 [3] NCCL INFO comm 0x50758ff0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua053:959076:959076 [2] NCCL INFO cudaDriverVersion 12010 +gpua053:959076:959076 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:959076:959076 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:959076:959150 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:959076:959150 [2] NCCL INFO Using network IB +gpua053:959076:959150 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua053:959076:959150 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpua053:959076:959150 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read +gpua053:959076:959150 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read +gpua053:959076:959150 [2] NCCL INFO Connected all rings +gpua053:959076:959150 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read +gpua053:959076:959150 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read +gpua053:959076:959150 [2] NCCL INFO Connected all trees +gpua053:959076:959150 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:959076:959150 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:959076:959150 [2] NCCL INFO comm 0xa5547430 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua028:3269322:3269322 [1] NCCL INFO cudaDriverVersion 12010 +gpua028:3269322:3269322 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0> +gpua028:3269322:3269322 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua028:3269322:3269404 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0> +gpua028:3269322:3269404 [1] NCCL INFO Using network IB +gpua028:3269322:3269404 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua028:3269322:3269404 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read +gpua028:3269322:3269404 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read +gpua028:3269322:3269404 [1] NCCL INFO Connected all rings +gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read +gpua028:3269322:3269404 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read +gpua028:3269322:3269404 [1] NCCL INFO Connected all trees +gpua028:3269322:3269404 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua028:3269322:3269404 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua028:3269322:3269404 [1] NCCL INFO comm 0x50ff9ba0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua053:959074:959074 [0] NCCL INFO cudaDriverVersion 12010 +gpua053:959074:959074 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:959074:959074 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:959074:959149 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:959074:959149 [0] NCCL INFO Using network IB +gpua053:959074:959149 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua053:959074:959149 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read +gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read +gpua053:959074:959149 [0] NCCL INFO Connected all rings +gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpua053:959074:959149 [0] NCCL INFO Connected all trees +gpua053:959074:959149 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:959074:959149 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:959074:959149 [0] NCCL INFO comm 0x50589df0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua028:3269321:3269321 [0] NCCL INFO cudaDriverVersion 12010 +gpua028:3269321:3269321 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0> +gpua028:3269321:3269321 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua028:3269321:3269403 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0> +gpua028:3269321:3269403 [0] NCCL INFO Using network IB +gpua028:3269321:3269403 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua028:3269321:3269403 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read +gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read +gpua028:3269321:3269403 [0] NCCL INFO Connected all rings +gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpua028:3269321:3269403 [0] NCCL INFO Connected all trees +gpua028:3269321:3269403 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua028:3269321:3269403 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua028:3269321:3269403 [0] NCCL INFO comm 0xc37df860 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua028:3269323:3269323 [2] NCCL INFO cudaDriverVersion 12010 +gpua028:3269323:3269323 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0> +gpua028:3269323:3269323 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua028:3269323:3269402 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0> +gpua028:3269323:3269402 [2] NCCL INFO Using network IB +gpua028:3269323:3269402 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua028:3269323:3269402 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpua028:3269323:3269402 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read +gpua028:3269323:3269402 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read +gpua028:3269323:3269402 [2] NCCL INFO Connected all rings +gpua028:3269323:3269402 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read +gpua028:3269323:3269402 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read +gpua028:3269323:3269402 [2] NCCL INFO Connected all trees +gpua028:3269323:3269402 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua028:3269323:3269402 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua028:3269323:3269402 [2] NCCL INFO comm 0x4fe1d010 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua053:959077:959077 [3] NCCL INFO cudaDriverVersion 12010 +gpua053:959077:959077 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:959077:959077 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:959077:959151 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:959077:959151 [3] NCCL INFO Using network IB +gpua053:959077:959151 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua053:959077:959151 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpua053:959077:959151 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpua053:959077:959151 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpua053:959077:959151 [3] NCCL INFO Connected all rings +gpua053:959077:959151 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read +gpua053:959077:959151 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read +gpua053:959077:959151 [3] NCCL INFO Connected all trees +gpua053:959077:959151 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:959077:959151 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:959077:959151 [3] NCCL INFO comm 0x8f7ecf20 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua053:959075:959075 [1] NCCL INFO cudaDriverVersion 12010 +gpua053:959075:959075 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:959075:959075 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:959075:959152 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:959075:959152 [1] NCCL INFO Using network IB +gpua053:959075:959152 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua053:959075:959152 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read +gpua053:959075:959152 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read +gpua053:959075:959152 [1] NCCL INFO Connected all rings +gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read +gpua053:959075:959152 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read +gpua053:959075:959152 [1] NCCL INFO Connected all trees +gpua053:959075:959152 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:959075:959152 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:959075:959152 [1] NCCL INFO comm 0x50f9bf70 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua055:3866106:3866106 [3] NCCL INFO cudaDriverVersion 12010 +gpua055:3866106:3866106 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> +gpua055:3866106:3866106 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua055:3866106:3866180 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> +gpua055:3866106:3866180 [3] NCCL INFO Using network IB +gpua055:3866106:3866180 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua055:3866106:3866180 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpua055:3866106:3866180 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpua055:3866106:3866180 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpua055:3866106:3866180 [3] NCCL INFO Connected all rings +gpua055:3866106:3866180 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read +gpua055:3866106:3866180 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read +gpua055:3866106:3866180 [3] NCCL INFO Connected all trees +gpua055:3866106:3866180 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua055:3866106:3866180 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua055:3866106:3866180 [3] NCCL INFO comm 0xb731bb50 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua055:3866104:3866104 [1] NCCL INFO cudaDriverVersion 12010 +gpua055:3866104:3866104 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> +gpua055:3866104:3866104 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua055:3866104:3866182 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> +gpua055:3866104:3866182 [1] NCCL INFO Using network IB +gpua055:3866104:3866182 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua055:3866104:3866182 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpua055:3866104:3866182 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read +gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read +gpua055:3866104:3866182 [1] NCCL INFO Connected all rings +gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpua055:3866104:3866182 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read +gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read +gpua055:3866104:3866182 [1] NCCL INFO Connected all trees +gpua055:3866104:3866182 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua055:3866104:3866182 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua055:3866104:3866182 [1] NCCL INFO comm 0x4ff24650 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua055:3866103:3866103 [0] NCCL INFO cudaDriverVersion 12010 +gpua055:3866103:3866103 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> +gpua055:3866103:3866103 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua055:3866103:3866183 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> +gpua055:3866103:3866183 [0] NCCL INFO Using network IB +gpua055:3866103:3866183 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua055:3866103:3866183 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read +gpua055:3866103:3866183 [0] NCCL INFO Connected all rings +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpua055:3866103:3866183 [0] NCCL INFO Connected all trees +gpua055:3866103:3866183 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua055:3866103:3866183 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua055:3866103:3866183 [0] NCCL INFO comm 0x8783410 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua055:3866105:3866105 [2] NCCL INFO cudaDriverVersion 12010 +gpua055:3866105:3866105 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> +gpua055:3866105:3866105 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua055:3866105:3866181 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> +gpua055:3866105:3866181 [2] NCCL INFO Using network IB +gpua055:3866105:3866181 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua055:3866105:3866181 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpua055:3866105:3866181 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read +gpua055:3866105:3866181 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read +gpua055:3866105:3866181 [2] NCCL INFO Connected all rings +gpua055:3866105:3866181 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read +gpua055:3866105:3866181 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read +gpua055:3866105:3866181 [2] NCCL INFO Connected all trees +gpua055:3866105:3866181 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua055:3866105:3866181 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua055:3866105:3866181 [2] NCCL INFO comm 0xa0bacc0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua098:2101209:2101209 [1] NCCL INFO cudaDriverVersion 12010 +gpua098:2101209:2101209 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0> +gpua098:2101209:2101209 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua098:2101209:2101288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0> +gpua098:2101209:2101288 [1] NCCL INFO Using network IB +gpua098:2101209:2101288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua098:2101209:2101288 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpua098:2101209:2101288 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read +gpua098:2101209:2101288 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read +gpua098:2101209:2101288 [1] NCCL INFO Connected all rings +gpua098:2101209:2101288 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read +gpua098:2101209:2101288 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read +gpua098:2101209:2101288 [1] NCCL INFO Connected all trees +gpua098:2101209:2101288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua098:2101209:2101288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua098:2101209:2101288 [1] NCCL INFO comm 0xb77452f0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua098:2101208:2101208 [0] NCCL INFO cudaDriverVersion 12010 +gpua098:2101208:2101208 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0> +gpua098:2101208:2101208 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua098:2101208:2101291 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0> +gpua098:2101208:2101291 [0] NCCL INFO Using network IB +gpua098:2101208:2101291 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua098:2101208:2101291 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read +gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read +gpua098:2101208:2101291 [0] NCCL INFO Connected all rings +gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpua098:2101208:2101291 [0] NCCL INFO Connected all trees +gpua098:2101208:2101291 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua098:2101208:2101291 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua098:2101208:2101291 [0] NCCL INFO comm 0x8ba9dc20 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua098:2101210:2101210 [2] NCCL INFO cudaDriverVersion 12010 +gpua098:2101210:2101210 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0> +gpua098:2101210:2101210 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua098:2101210:2101290 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0> +gpua098:2101210:2101290 [2] NCCL INFO Using network IB +gpua098:2101210:2101290 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua098:2101210:2101290 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpua098:2101210:2101290 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read +gpua098:2101210:2101290 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read +gpua098:2101210:2101290 [2] NCCL INFO Connected all rings +gpua098:2101210:2101290 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read +gpua098:2101210:2101290 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read +gpua098:2101210:2101290 [2] NCCL INFO Connected all trees +gpua098:2101210:2101290 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua098:2101210:2101290 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua098:2101210:2101290 [2] NCCL INFO comm 0xb13e4b0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua090:2294100:2294100 [3] NCCL INFO cudaDriverVersion 12010 +gpua090:2294100:2294100 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0> +gpua090:2294100:2294100 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua090:2294100:2294189 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0> +gpua090:2294100:2294189 [3] NCCL INFO Using network IB +gpua090:2294100:2294189 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua090:2294100:2294189 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpua090:2294100:2294189 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpua090:2294100:2294189 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpua090:2294100:2294189 [3] NCCL INFO Connected all rings +gpua090:2294100:2294189 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read +gpua090:2294100:2294189 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read +gpua090:2294100:2294189 [3] NCCL INFO Connected all trees +gpua090:2294100:2294189 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua090:2294100:2294189 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua090:2294100:2294189 [3] NCCL INFO comm 0x8d2a2250 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua090:2294099:2294099 [2] NCCL INFO cudaDriverVersion 12010 +gpua090:2294099:2294099 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0> +gpua090:2294099:2294099 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua090:2294099:2294186 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0> +gpua090:2294099:2294186 [2] NCCL INFO Using network IB +gpua090:2294099:2294186 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua090:2294099:2294186 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpua090:2294099:2294186 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read +gpua090:2294099:2294186 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read +gpua090:2294099:2294186 [2] NCCL INFO Connected all rings +gpua090:2294099:2294186 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read +gpua090:2294099:2294186 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read +gpua090:2294099:2294186 [2] NCCL INFO Connected all trees +gpua090:2294099:2294186 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua090:2294099:2294186 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua090:2294099:2294186 [2] NCCL INFO comm 0x508070c0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua003:350634:350634 [1] NCCL INFO cudaDriverVersion 12010 +gpua003:350634:350634 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0> +gpua003:350634:350634 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua003:350634:350707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0> +gpua003:350634:350707 [1] NCCL INFO Using network IB +gpua003:350634:350707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua003:350634:350707 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpua003:350634:350707 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read +gpua003:350634:350707 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read +gpua003:350634:350707 [1] NCCL INFO Connected all rings +gpua003:350634:350707 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read +gpua003:350634:350707 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read +gpua003:350634:350707 [1] NCCL INFO Connected all trees +gpua003:350634:350707 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua003:350634:350707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua003:350634:350707 [1] NCCL INFO comm 0xb8217e10 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua090:2294098:2294098 [1] NCCL INFO cudaDriverVersion 12010 +gpua090:2294098:2294098 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0> +gpua090:2294098:2294098 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua090:2294098:2294187 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0> +gpua090:2294098:2294187 [1] NCCL INFO Using network IB +gpua090:2294098:2294187 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua090:2294098:2294187 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read +gpua090:2294098:2294187 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read +gpua090:2294098:2294187 [1] NCCL INFO Connected all rings +gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read +gpua090:2294098:2294187 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read +gpua090:2294098:2294187 [1] NCCL INFO Connected all trees +gpua090:2294098:2294187 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua090:2294098:2294187 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua090:2294098:2294187 [1] NCCL INFO comm 0xb9291470 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua098:2101211:2101211 [3] NCCL INFO cudaDriverVersion 12010 +gpua098:2101211:2101211 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0> +gpua098:2101211:2101211 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua098:2101211:2101289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0> +gpua098:2101211:2101289 [3] NCCL INFO Using network IB +gpua098:2101211:2101289 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua098:2101211:2101289 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpua098:2101211:2101289 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpua098:2101211:2101289 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpua098:2101211:2101289 [3] NCCL INFO Connected all rings +gpua098:2101211:2101289 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read +gpua098:2101211:2101289 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read +gpua098:2101211:2101289 [3] NCCL INFO Connected all trees +gpua098:2101211:2101289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua098:2101211:2101289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua098:2101211:2101289 [3] NCCL INFO comm 0xb9e844a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua003:350636:350636 [3] NCCL INFO cudaDriverVersion 12010 +gpua003:350636:350636 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0> +gpua003:350636:350636 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua003:350636:350708 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0> +gpua003:350636:350708 [3] NCCL INFO Using network IB +gpua003:350636:350708 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua003:350636:350708 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpua003:350636:350708 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpua003:350636:350708 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpua003:350636:350708 [3] NCCL INFO Connected all rings +gpua003:350636:350708 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read +gpua003:350636:350708 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read +gpua003:350636:350708 [3] NCCL INFO Connected all trees +gpua003:350636:350708 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua003:350636:350708 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua003:350636:350708 [3] NCCL INFO comm 0x8b901f80 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua010:1622002:1622002 [2] NCCL INFO cudaDriverVersion 12010 +gpua010:1622002:1622002 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0> +gpua010:1622002:1622002 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua010:1622002:1622073 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0> +gpua010:1622002:1622073 [2] NCCL INFO Using network IB +gpua010:1622002:1622073 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua010:1622002:1622073 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpua010:1622002:1622073 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read +gpua010:1622002:1622073 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read +gpua010:1622002:1622073 [2] NCCL INFO Connected all rings +gpua010:1622002:1622073 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read +gpua010:1622002:1622073 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read +gpua010:1622002:1622073 [2] NCCL INFO Connected all trees +gpua010:1622002:1622073 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua010:1622002:1622073 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua010:1622002:1622073 [2] NCCL INFO comm 0x95597d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua003:350633:350706 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0> +gpua003:350633:350706 [0] NCCL INFO Using network IB +gpua003:350633:350706 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua003:350633:350706 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpua003:350633:350706 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpua003:350633:350706 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read +gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read +gpua003:350633:350706 [0] NCCL INFO Connected all rings +gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpua003:350633:350706 [0] NCCL INFO Connected all trees +gpua003:350633:350706 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua003:350633:350706 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua003:350633:350706 [0] NCCL INFO comm 0x505c0d10 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua003:350635:350635 [2] NCCL INFO cudaDriverVersion 12010 +gpua003:350635:350635 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0> +gpua003:350635:350635 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua003:350635:350709 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0> +gpua003:350635:350709 [2] NCCL INFO Using network IB +gpua003:350635:350709 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua003:350635:350709 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpua003:350635:350709 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read +gpua003:350635:350709 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read +gpua003:350635:350709 [2] NCCL INFO Connected all rings +gpua003:350635:350709 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read +gpua003:350635:350709 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read +gpua003:350635:350709 [2] NCCL INFO Connected all trees +gpua003:350635:350709 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua003:350635:350709 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua003:350635:350709 [2] NCCL INFO comm 0xc165ff50 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua090:2294097:2294097 [0] NCCL INFO cudaDriverVersion 12010 +gpua090:2294097:2294097 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0> +gpua090:2294097:2294097 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua090:2294097:2294188 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0> +gpua090:2294097:2294188 [0] NCCL INFO Using network IB +gpua090:2294097:2294188 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua090:2294097:2294188 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read +gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read +gpua090:2294097:2294188 [0] NCCL INFO Connected all rings +gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpua090:2294097:2294188 [0] NCCL INFO Connected all trees +gpua090:2294097:2294188 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua090:2294097:2294188 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua090:2294097:2294188 [0] NCCL INFO comm 0x4ed27c50 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua010:1622003:1622003 [3] NCCL INFO cudaDriverVersion 12010 +gpua010:1622003:1622003 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0> +gpua010:1622003:1622003 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua010:1622003:1622076 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0> +gpua010:1622003:1622076 [3] NCCL INFO Using network IB +gpua010:1622003:1622076 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua010:1622003:1622076 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpua010:1622003:1622076 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpua010:1622003:1622076 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpua010:1622003:1622076 [3] NCCL INFO Connected all rings +gpua010:1622003:1622076 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read +gpua010:1622003:1622076 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read +gpua010:1622003:1622076 [3] NCCL INFO Connected all trees +gpua010:1622003:1622076 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua010:1622003:1622076 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua010:1622003:1622076 [3] NCCL INFO comm 0x9c22310 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua010:1622000:1622000 [0] NCCL INFO cudaDriverVersion 12010 +gpua010:1622000:1622000 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0> +gpua010:1622000:1622000 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua010:1622000:1622074 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0> +gpua010:1622000:1622074 [0] NCCL INFO Using network IB +gpua010:1622000:1622074 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua010:1622000:1622074 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read +gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read +gpua010:1622000:1622074 [0] NCCL INFO Connected all rings +gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpua010:1622000:1622074 [0] NCCL INFO Connected all trees +gpua010:1622000:1622074 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua010:1622000:1622074 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua010:1622000:1622074 [0] NCCL INFO comm 0xc2d78fd0 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua010:1622001:1622001 [1] NCCL INFO cudaDriverVersion 12010 +gpua010:1622001:1622001 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0> +gpua010:1622001:1622001 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua010:1622001:1622075 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0> +gpua010:1622001:1622075 [1] NCCL INFO Using network IB +gpua010:1622001:1622075 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua010:1622001:1622075 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read +gpua010:1622001:1622075 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read +gpua010:1622001:1622075 [1] NCCL INFO Connected all rings +gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read +gpua010:1622001:1622075 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read +gpua010:1622001:1622075 [1] NCCL INFO Connected all trees +gpua010:1622001:1622075 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua010:1622001:1622075 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua010:1622001:1622075 [1] NCCL INFO comm 0x8e6a9490 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua060:2854971:2854971 [3] NCCL INFO cudaDriverVersion 12010 +gpua060:2854971:2854971 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2854971:2854971 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2854971:2855041 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2854971:2855041 [3] NCCL INFO Using network IB +gpua060:2854971:2855041 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua060:2854971:2855041 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpua060:2854971:2855041 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpua060:2854971:2855041 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpua060:2854971:2855041 [3] NCCL INFO Connected all rings +gpua060:2854971:2855041 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read +gpua060:2854971:2855041 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read +gpua025:63838:63838 [2] NCCL INFO cudaDriverVersion 12010 +gpua025:63838:63838 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0> +gpua025:63838:63838 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua025:63838:63912 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0> +gpua025:63838:63912 [2] NCCL INFO Using network IB +gpua025:63838:63912 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua025:63838:63912 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpua025:63838:63912 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read +gpua025:63838:63912 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read +gpua025:63838:63912 [2] NCCL INFO Connected all rings +gpua025:63838:63912 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read +gpua025:63838:63912 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read +gpua025:63838:63912 [2] NCCL INFO Connected all trees +gpua060:2854971:2855041 [3] NCCL INFO Connected all trees +gpua060:2854971:2855041 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2854971:2855041 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2854971:2855041 [3] NCCL INFO comm 0xb6f9a6a0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua025:63838:63912 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua025:63838:63912 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua025:63838:63912 [2] NCCL INFO comm 0xc1f876b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua025:63837:63837 [1] NCCL INFO cudaDriverVersion 12010 +gpua025:63837:63837 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0> +gpua025:63837:63837 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua025:63837:63913 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0> +gpua025:63837:63913 [1] NCCL INFO Using network IB +gpua025:63837:63913 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua025:63837:63913 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpua025:63837:63913 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read +gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read +gpua025:63837:63913 [1] NCCL INFO Connected all rings +gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpua025:63837:63913 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read +gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read +gpua025:63837:63913 [1] NCCL INFO Connected all trees +gpua025:63837:63913 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua025:63837:63913 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua025:63837:63913 [1] NCCL INFO comm 0xa196ac90 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua025:63839:63839 [3] NCCL INFO cudaDriverVersion 12010 +gpua025:63839:63839 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0> +gpua025:63839:63839 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua025:63839:63914 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0> +gpua025:63839:63914 [3] NCCL INFO Using network IB +gpua025:63839:63914 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua025:63839:63914 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpua025:63839:63914 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpua025:63839:63914 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpua025:63839:63914 [3] NCCL INFO Connected all rings +gpua025:63839:63914 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read +gpua025:63839:63914 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read +gpua025:63839:63914 [3] NCCL INFO Connected all trees +gpua025:63839:63914 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua025:63839:63914 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua025:63839:63914 [3] NCCL INFO comm 0xc1e534d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua060:2854968:2854968 [0] NCCL INFO cudaDriverVersion 12010 +gpua060:2854968:2854968 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2854968:2854968 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2854968:2855043 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2854968:2855043 [0] NCCL INFO Using network IB +gpua060:2854968:2855043 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua060:2854968:2855043 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read +gpua060:2854968:2855043 [0] NCCL INFO Connected all rings +gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpua060:2854968:2855043 [0] NCCL INFO Connected all trees +gpua060:2854968:2855043 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2854968:2855043 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2854968:2855043 [0] NCCL INFO comm 0x9da77350 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua060:2854970:2854970 [2] NCCL INFO cudaDriverVersion 12010 +gpua060:2854970:2854970 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2854970:2854970 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2854970:2855044 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2854970:2855044 [2] NCCL INFO Using network IB +gpua060:2854970:2855044 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua060:2854970:2855044 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpua060:2854970:2855044 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read +gpua060:2854970:2855044 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read +gpua060:2854970:2855044 [2] NCCL INFO Connected all rings +gpua060:2854970:2855044 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read +gpua060:2854970:2855044 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read +gpua060:2854970:2855044 [2] NCCL INFO Connected all trees +gpua060:2854970:2855044 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2854970:2855044 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2854970:2855044 [2] NCCL INFO comm 0xb4b68d30 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua025:63836:63836 [0] NCCL INFO cudaDriverVersion 12010 +gpua025:63836:63836 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0> +gpua025:63836:63836 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua025:63836:63915 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0> +gpua025:63836:63915 [0] NCCL INFO Using network IB +gpua025:63836:63915 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua025:63836:63915 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read +gpua025:63836:63915 [0] NCCL INFO Connected all rings +gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpua025:63836:63915 [0] NCCL INFO Connected all trees +gpua025:63836:63915 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua025:63836:63915 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua025:63836:63915 [0] NCCL INFO comm 0x1772ec20 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua060:2854969:2854969 [1] NCCL INFO cudaDriverVersion 12010 +gpua060:2854969:2854969 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:2854969:2854969 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:2854969:2855042 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:2854969:2855042 [1] NCCL INFO Using network IB +gpua060:2854969:2855042 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua060:2854969:2855042 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpua060:2854969:2855042 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read +gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read +gpua060:2854969:2855042 [1] NCCL INFO Connected all rings +gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpua060:2854969:2855042 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read +gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read +gpua060:2854969:2855042 [1] NCCL INFO Connected all trees +gpua060:2854969:2855042 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:2854969:2855042 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:2854969:2855042 [1] NCCL INFO comm 0x8c2cb6d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua005:322787:322787 [2] NCCL INFO cudaDriverVersion 12010 +gpua005:322787:322787 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0> +gpua005:322787:322787 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua005:322787:322863 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0> +gpua005:322787:322863 [2] NCCL INFO Using network IB +gpua005:322787:322863 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua005:322787:322863 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpua005:322787:322863 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read +gpua005:322787:322863 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read +gpua005:322787:322863 [2] NCCL INFO Connected all rings +gpua005:322787:322863 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read +gpua005:322787:322863 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read +gpua005:322787:322863 [2] NCCL INFO Connected all trees +gpua005:322787:322863 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua005:322787:322863 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua005:322787:322863 [2] NCCL INFO comm 0xa671d450 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua005:322788:322788 [3] NCCL INFO cudaDriverVersion 12010 +gpua005:322788:322788 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0> +gpua005:322788:322788 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua005:322788:322860 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0> +gpua005:322788:322860 [3] NCCL INFO Using network IB +gpua005:322788:322860 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua005:322788:322860 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpua005:322788:322860 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpua005:322788:322860 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpua005:322788:322860 [3] NCCL INFO Connected all rings +gpua005:322788:322860 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read +gpua005:322788:322860 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read +gpua005:322788:322860 [3] NCCL INFO Connected all trees +gpua005:322788:322860 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua005:322788:322860 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua005:322788:322860 [3] NCCL INFO comm 0xb7586590 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua005:322785:322785 [0] NCCL INFO cudaDriverVersion 12010 +gpua005:322785:322785 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0> +gpua005:322785:322785 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua005:322785:322861 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0> +gpua005:322785:322861 [0] NCCL INFO Using network IB +gpua005:322785:322861 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua005:322785:322861 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read +gpua005:322785:322861 [0] NCCL INFO Connected all rings +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpua005:322785:322861 [0] NCCL INFO Connected all trees +gpua005:322785:322861 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua005:322785:322861 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua005:322785:322861 [0] NCCL INFO comm 0xbdcfe00 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua005:322786:322786 [1] NCCL INFO cudaDriverVersion 12010 +gpua005:322786:322786 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0> +gpua005:322786:322786 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua005:322786:322862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0> +gpua005:322786:322862 [1] NCCL INFO Using network IB +gpua005:322786:322862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua005:322786:322862 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpua005:322786:322862 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read +gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read +gpua005:322786:322862 [1] NCCL INFO Connected all rings +gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpua005:322786:322862 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read +gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read +gpua005:322786:322862 [1] NCCL INFO Connected all trees +gpua005:322786:322862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua005:322786:322862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua005:322786:322862 [1] NCCL INFO comm 0x9e527b50 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua035:1685218:1685218 [2] NCCL INFO cudaDriverVersion 12010 +gpua035:1685218:1685218 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:1685218:1685218 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:1685218:1685292 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:1685218:1685292 [2] NCCL INFO Using network IB +gpua035:1685218:1685292 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua035:1685218:1685292 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpua035:1685218:1685292 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read +gpua035:1685218:1685292 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read +gpua035:1685218:1685292 [2] NCCL INFO Connected all rings +gpua035:1685218:1685292 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read +gpua035:1685218:1685292 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read +gpua035:1685218:1685292 [2] NCCL INFO Connected all trees +gpua035:1685218:1685292 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:1685218:1685292 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:1685218:1685292 [2] NCCL INFO comm 0x5149e590 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua035:1685217:1685217 [1] NCCL INFO cudaDriverVersion 12010 +gpua035:1685217:1685217 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:1685217:1685217 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:1685217:1685295 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:1685217:1685295 [1] NCCL INFO Using network IB +gpua035:1685217:1685295 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua035:1685217:1685295 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpua035:1685217:1685295 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read +gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read +gpua035:1685217:1685295 [1] NCCL INFO Connected all rings +gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpua035:1685217:1685295 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read +gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read +gpua035:1685217:1685295 [1] NCCL INFO Connected all trees +gpua035:1685217:1685295 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:1685217:1685295 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:1685217:1685295 [1] NCCL INFO comm 0x94073350 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua035:1685219:1685219 [3] NCCL INFO cudaDriverVersion 12010 +gpua035:1685219:1685219 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:1685219:1685219 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:1685219:1685293 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:1685219:1685293 [3] NCCL INFO Using network IB +gpua035:1685219:1685293 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua035:1685219:1685293 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpua035:1685219:1685293 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpua035:1685219:1685293 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpua035:1685219:1685293 [3] NCCL INFO Connected all rings +gpua035:1685219:1685293 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read +gpua035:1685219:1685293 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read +gpua035:1685219:1685293 [3] NCCL INFO Connected all trees +gpua035:1685219:1685293 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:1685219:1685293 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:1685219:1685293 [3] NCCL INFO comm 0x9d08f8e0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua035:1685216:1685216 [0] NCCL INFO cudaDriverVersion 12010 +gpua035:1685216:1685216 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:1685216:1685216 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:1685216:1685294 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:1685216:1685294 [0] NCCL INFO Using network IB +gpua035:1685216:1685294 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua035:1685216:1685294 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read +gpua035:1685216:1685294 [0] NCCL INFO Connected all rings +gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpua035:1685216:1685294 [0] NCCL INFO Connected all trees +gpua035:1685216:1685294 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:1685216:1685294 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:1685216:1685294 [0] NCCL INFO comm 0x8b5a90d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpua003:0/64] 2023-07-05 22:46:27,131 (trainer:732) INFO: 14epoch:train:1-100batch: iter_time=1.256, forward_time=0.181, loss_ctc=67.478, loss_att=50.061, acc=0.683, loss=55.286, backward_time=0.765, grad_norm=84.127, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.113, optim0_lr0=1.020e-04, train_time=5.776 +[gpua003:0/64] 2023-07-05 22:48:06,129 (trainer:732) INFO: 14epoch:train:101-200batch: iter_time=1.036e-04, forward_time=0.104, loss_ctc=76.315, loss_att=60.252, acc=0.659, loss=65.071, backward_time=0.747, grad_norm=106.131, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.020e-04, train_time=1.980 +[gpua003:0/64] 2023-07-05 22:49:44,933 (trainer:732) INFO: 14epoch:train:201-300batch: iter_time=1.112e-04, forward_time=0.104, loss_ctc=71.342, loss_att=53.820, acc=0.681, loss=59.077, backward_time=0.744, grad_norm=88.859, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.020e-04, train_time=1.976 +[gpua003:0/64] 2023-07-05 22:51:23,717 (trainer:732) INFO: 14epoch:train:301-400batch: iter_time=1.061e-04, forward_time=0.104, loss_ctc=74.278, loss_att=54.336, acc=0.672, loss=60.318, backward_time=0.744, grad_norm=83.344, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.019e-04, train_time=1.975 +[gpua003:0/64] 2023-07-05 22:53:02,383 (trainer:732) INFO: 14epoch:train:401-500batch: iter_time=9.902e-05, forward_time=0.104, loss_ctc=73.819, loss_att=59.568, acc=0.675, loss=63.843, backward_time=0.745, grad_norm=90.212, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.019e-04, train_time=1.973 +[gpua003:0/64] 2023-07-05 22:54:41,341 (trainer:732) INFO: 14epoch:train:501-600batch: iter_time=9.856e-05, forward_time=0.105, loss_ctc=67.201, loss_att=54.515, acc=0.666, loss=58.321, backward_time=0.746, grad_norm=85.901, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.018e-04, train_time=1.979 +[gpua003:0/64] 2023-07-05 22:56:20,255 (trainer:732) INFO: 14epoch:train:601-700batch: iter_time=9.906e-05, forward_time=0.105, loss_ctc=79.513, loss_att=65.577, acc=0.663, loss=69.758, backward_time=0.745, grad_norm=91.560, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.018e-04, train_time=1.978 +[gpua003:0/64] 2023-07-05 22:58:13,995 (trainer:732) INFO: 14epoch:train:701-800batch: iter_time=1.052e-04, forward_time=0.104, loss_ctc=86.261, loss_att=57.434, acc=0.687, loss=66.082, backward_time=0.756, grad_norm=111.598, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=2.275 +[gpua003:0/64] 2023-07-05 22:59:03,798 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-05 22:59:22,487 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 22:59:25,999 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 22:59:26,000 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-05 22:59:26,006 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-05 23:04:47,145 (trainer:732) INFO: 14epoch:train:801-900batch: iter_time=1.366, forward_time=0.106, loss_ctc=79.493, loss_att=56.962, acc=0.683, loss=63.721, backward_time=0.769, grad_norm=96.755, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=7.863 +[gpua003:0/64] 2023-07-05 23:06:27,057 (trainer:732) INFO: 14epoch:train:901-1000batch: iter_time=1.071e-04, forward_time=0.107, loss_ctc=77.587, loss_att=65.112, acc=0.669, loss=68.855, backward_time=0.749, grad_norm=96.509, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=1.998 +[gpua003:0/64] 2023-07-05 23:08:06,608 (trainer:732) INFO: 14epoch:train:1001-1100batch: iter_time=1.219e-04, forward_time=0.108, loss_ctc=69.740, loss_att=52.983, acc=0.695, loss=58.010, backward_time=0.747, grad_norm=83.010, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.016e-04, train_time=1.991 +[gpua003:0/64] 2023-07-05 23:09:46,058 (trainer:732) INFO: 14epoch:train:1101-1200batch: iter_time=9.900e-05, forward_time=0.107, loss_ctc=71.265, loss_att=51.955, acc=0.681, loss=57.748, backward_time=0.747, grad_norm=86.780, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.016e-04, train_time=1.989 +[gpua003:0/64] 2023-07-05 23:11:25,939 (trainer:732) INFO: 14epoch:train:1201-1300batch: iter_time=1.090e-04, forward_time=0.107, loss_ctc=72.245, loss_att=58.430, acc=0.682, loss=62.574, backward_time=0.748, grad_norm=82.034, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.015e-04, train_time=1.997 +[gpua003:0/64] 2023-07-05 23:13:05,201 (trainer:732) INFO: 14epoch:train:1301-1400batch: iter_time=1.137e-04, forward_time=0.106, loss_ctc=68.613, loss_att=56.255, acc=0.673, loss=59.963, backward_time=0.746, grad_norm=90.540, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.015e-04, train_time=1.985 +[gpua003:0/64] 2023-07-05 23:14:44,608 (trainer:732) INFO: 14epoch:train:1401-1500batch: iter_time=1.156e-04, forward_time=0.107, loss_ctc=75.103, loss_att=62.936, acc=0.675, loss=66.586, backward_time=0.747, grad_norm=91.101, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=1.988 +[gpua003:0/64] 2023-07-05 23:16:23,886 (trainer:732) INFO: 14epoch:train:1501-1600batch: iter_time=1.146e-04, forward_time=0.107, loss_ctc=85.725, loss_att=59.152, acc=0.686, loss=67.124, backward_time=0.748, grad_norm=323.443, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=1.985 +[gpua003:0/64] 2023-07-05 23:17:31,811 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-05 23:17:50,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 23:17:54,347 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 23:17:54,348 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-05 23:17:54,354 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-05 23:22:17,852 (trainer:732) INFO: 14epoch:train:1601-1700batch: iter_time=1.315, forward_time=0.107, loss_ctc=91.275, loss_att=63.464, acc=0.682, loss=71.807, backward_time=0.761, grad_norm=113.989, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=7.079 +[gpua003:0/64] 2023-07-05 23:23:57,809 (trainer:732) INFO: 14epoch:train:1701-1800batch: iter_time=1.095e-04, forward_time=0.106, loss_ctc=65.126, loss_att=52.331, acc=0.668, loss=56.169, backward_time=0.746, grad_norm=83.364, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.013e-04, train_time=1.999 +[gpua003:0/64] 2023-07-05 23:25:37,561 (trainer:732) INFO: 14epoch:train:1801-1900batch: iter_time=1.136e-04, forward_time=0.105, loss_ctc=79.242, loss_att=61.858, acc=0.676, loss=67.073, backward_time=0.744, grad_norm=97.701, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.013e-04, train_time=1.995 +[gpua003:0/64] 2023-07-05 23:27:17,066 (trainer:732) INFO: 14epoch:train:1901-2000batch: iter_time=1.351e-04, forward_time=0.108, loss_ctc=65.033, loss_att=46.741, acc=0.696, loss=52.229, backward_time=0.748, grad_norm=72.732, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.012e-04, train_time=1.990 +[gpua003:0/64] 2023-07-05 23:28:56,259 (trainer:732) INFO: 14epoch:train:2001-2100batch: iter_time=1.376e-04, forward_time=0.107, loss_ctc=74.297, loss_att=55.693, acc=0.679, loss=61.274, backward_time=0.747, grad_norm=81.546, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.111, optim0_lr0=1.012e-04, train_time=1.984 +[gpua003:0/64] 2023-07-05 23:30:35,698 (trainer:732) INFO: 14epoch:train:2101-2200batch: iter_time=1.153e-04, forward_time=0.107, loss_ctc=69.547, loss_att=58.660, acc=0.668, loss=61.926, backward_time=0.747, grad_norm=98.590, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.012e-04, train_time=1.989 +[gpua003:0/64] 2023-07-05 23:32:14,941 (trainer:732) INFO: 14epoch:train:2201-2300batch: iter_time=1.191e-04, forward_time=0.105, loss_ctc=70.991, loss_att=61.134, acc=0.665, loss=64.091, backward_time=0.745, grad_norm=88.986, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.011e-04, train_time=1.985 +[gpua003:0/64] 2023-07-05 23:33:54,151 (trainer:732) INFO: 14epoch:train:2301-2400batch: iter_time=1.136e-04, forward_time=0.106, loss_ctc=78.668, loss_att=55.499, acc=0.691, loss=62.450, backward_time=0.744, grad_norm=94.472, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.011e-04, train_time=1.984 +[gpua003:0/64] 2023-07-05 23:35:33,588 (trainer:732) INFO: 14epoch:train:2401-2500batch: iter_time=1.069e-04, forward_time=0.106, loss_ctc=89.577, loss_att=66.586, acc=0.668, loss=73.483, backward_time=0.746, grad_norm=115.780, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.010e-04, train_time=1.989 +[gpua003:0/64] 2023-07-05 23:35:35,877 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-05 23:35:54,848 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 23:35:58,368 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 23:35:58,368 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-05 23:35:58,374 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-05 23:42:19,890 (trainer:732) INFO: 14epoch:train:2501-2600batch: iter_time=1.276, forward_time=0.106, loss_ctc=65.318, loss_att=47.624, acc=0.706, loss=52.932, backward_time=0.758, grad_norm=76.861, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.010e-04, train_time=8.126 +[gpua003:0/64] 2023-07-05 23:44:00,174 (trainer:732) INFO: 14epoch:train:2601-2700batch: iter_time=1.018e-04, forward_time=0.106, loss_ctc=74.292, loss_att=60.131, acc=0.672, loss=64.379, backward_time=0.748, grad_norm=91.235, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.010e-04, train_time=2.005 +[gpua003:0/64] 2023-07-05 23:45:39,635 (trainer:732) INFO: 14epoch:train:2701-2800batch: iter_time=9.909e-05, forward_time=0.106, loss_ctc=70.460, loss_att=53.646, acc=0.696, loss=58.690, backward_time=0.747, grad_norm=76.382, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.009e-04, train_time=1.989 +[gpua003:0/64] 2023-07-05 23:47:18,986 (trainer:732) INFO: 14epoch:train:2801-2900batch: iter_time=9.455e-05, forward_time=0.107, loss_ctc=72.107, loss_att=52.607, acc=0.679, loss=58.457, backward_time=0.746, grad_norm=92.055, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.009e-04, train_time=1.987 +[gpua003:0/64] 2023-07-05 23:48:58,340 (trainer:732) INFO: 14epoch:train:2901-3000batch: iter_time=8.975e-05, forward_time=0.106, loss_ctc=71.947, loss_att=58.354, acc=0.687, loss=62.432, backward_time=0.747, grad_norm=87.978, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.008e-04, train_time=1.987 +[gpua003:0/64] 2023-07-05 23:50:37,583 (trainer:732) INFO: 14epoch:train:3001-3100batch: iter_time=1.174e-04, forward_time=0.106, loss_ctc=67.838, loss_att=54.060, acc=0.680, loss=58.194, backward_time=0.746, grad_norm=87.646, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.008e-04, train_time=1.985 +[gpua003:0/64] 2023-07-05 23:52:16,968 (trainer:732) INFO: 14epoch:train:3101-3200batch: iter_time=9.872e-05, forward_time=0.106, loss_ctc=78.444, loss_att=65.330, acc=0.674, loss=69.264, backward_time=0.747, grad_norm=89.372, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.007e-04, train_time=1.987 +[gpua003:0/64] 2023-07-05 23:53:56,336 (trainer:732) INFO: 14epoch:train:3201-3300batch: iter_time=1.046e-04, forward_time=0.106, loss_ctc=84.969, loss_att=57.606, acc=0.694, loss=65.815, backward_time=0.746, grad_norm=110.526, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.007e-04, train_time=1.987 +[gpua003:0/64] 2023-07-05 23:54:31,552 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-05 23:54:50,698 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-05 23:54:54,271 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-05 23:54:54,271 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-05 23:54:54,277 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-05 23:59:43,333 (trainer:732) INFO: 14epoch:train:3301-3400batch: iter_time=1.303, forward_time=0.146, loss_ctc=74.612, loss_att=55.066, acc=0.684, loss=60.930, backward_time=0.760, grad_norm=98.484, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.111, optim0_lr0=1.007e-04, train_time=6.939 +[gpua003:0/64] 2023-07-06 00:01:23,508 (trainer:732) INFO: 14epoch:train:3401-3500batch: iter_time=9.903e-05, forward_time=0.106, loss_ctc=79.257, loss_att=64.273, acc=0.665, loss=68.768, backward_time=0.747, grad_norm=97.943, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.006e-04, train_time=2.004 +[gpua003:0/64] 2023-07-06 00:03:02,774 (trainer:732) INFO: 14epoch:train:3501-3600batch: iter_time=1.067e-04, forward_time=0.105, loss_ctc=69.162, loss_att=52.606, acc=0.690, loss=57.572, backward_time=0.744, grad_norm=91.125, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.006e-04, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:04:42,112 (trainer:732) INFO: 14epoch:train:3601-3700batch: iter_time=1.060e-04, forward_time=0.105, loss_ctc=67.528, loss_att=48.483, acc=0.692, loss=54.197, backward_time=0.746, grad_norm=77.302, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.005e-04, train_time=1.987 +[gpua003:0/64] 2023-07-06 00:06:21,444 (trainer:732) INFO: 14epoch:train:3701-3800batch: iter_time=1.056e-04, forward_time=0.105, loss_ctc=70.824, loss_att=56.721, acc=0.686, loss=60.952, backward_time=0.745, grad_norm=88.267, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.005e-04, train_time=1.986 +[gpua003:0/64] 2023-07-06 00:08:00,672 (trainer:732) INFO: 14epoch:train:3801-3900batch: iter_time=1.154e-04, forward_time=0.106, loss_ctc=64.923, loss_att=53.550, acc=0.673, loss=56.962, backward_time=0.746, grad_norm=87.728, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.005e-04, train_time=1.984 +[gpua003:0/64] 2023-07-06 00:09:40,070 (trainer:732) INFO: 14epoch:train:3901-4000batch: iter_time=1.270e-04, forward_time=0.106, loss_ctc=73.750, loss_att=61.788, acc=0.673, loss=65.377, backward_time=0.746, grad_norm=99.456, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.004e-04, train_time=1.988 +[gpua003:0/64] 2023-07-06 00:11:19,231 (trainer:732) INFO: 14epoch:train:4001-4100batch: iter_time=1.074e-04, forward_time=0.105, loss_ctc=83.056, loss_att=57.768, acc=0.689, loss=65.354, backward_time=0.744, grad_norm=118.212, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.004e-04, train_time=1.983 +[gpua003:0/64] 2023-07-06 00:12:39,831 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 00:12:59,113 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 00:13:02,670 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 00:13:02,670 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 00:13:02,687 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 00:16:52,797 (trainer:732) INFO: 14epoch:train:4101-4200batch: iter_time=2.223, forward_time=0.105, loss_ctc=86.387, loss_att=61.491, acc=0.682, loss=68.960, backward_time=0.756, grad_norm=109.255, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.003e-04, train_time=6.671 +[gpua003:0/64] 2023-07-06 00:18:32,838 (trainer:732) INFO: 14epoch:train:4201-4300batch: iter_time=9.583e-05, forward_time=0.105, loss_ctc=66.113, loss_att=52.338, acc=0.676, loss=56.471, backward_time=0.749, grad_norm=87.547, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.003e-04, train_time=2.001 +[gpua003:0/64] 2023-07-06 00:20:12,105 (trainer:732) INFO: 14epoch:train:4301-4400batch: iter_time=9.179e-05, forward_time=0.105, loss_ctc=74.526, loss_att=56.969, acc=0.689, loss=62.236, backward_time=0.745, grad_norm=94.528, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.003e-04, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:21:52,782 (trainer:732) INFO: 14epoch:train:4401-4500batch: iter_time=1.001e-04, forward_time=0.105, loss_ctc=67.853, loss_att=50.469, acc=0.683, loss=55.684, backward_time=0.746, grad_norm=71.313, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.002e-04, train_time=2.013 +[gpua003:0/64] 2023-07-06 00:23:34,997 (trainer:732) INFO: 14epoch:train:4501-4600batch: iter_time=1.021e-04, forward_time=0.105, loss_ctc=70.434, loss_att=50.423, acc=0.692, loss=56.426, backward_time=0.747, grad_norm=81.773, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.002e-04, train_time=2.044 +[gpua003:0/64] 2023-07-06 00:25:14,224 (trainer:732) INFO: 14epoch:train:4601-4700batch: iter_time=9.233e-05, forward_time=0.104, loss_ctc=72.767, loss_att=60.956, acc=0.675, loss=64.499, backward_time=0.745, grad_norm=91.196, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.001e-04, train_time=1.984 +[gpua003:0/64] 2023-07-06 00:26:53,508 (trainer:732) INFO: 14epoch:train:4701-4800batch: iter_time=9.383e-05, forward_time=0.105, loss_ctc=67.376, loss_att=54.770, acc=0.670, loss=58.552, backward_time=0.746, grad_norm=96.216, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.001e-04, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:28:32,809 (trainer:732) INFO: 14epoch:train:4801-4900batch: iter_time=1.024e-04, forward_time=0.105, loss_ctc=78.619, loss_att=59.814, acc=0.681, loss=65.456, backward_time=0.745, grad_norm=92.282, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.001e-04, train_time=1.986 +[gpua003:0/64] 2023-07-06 00:30:12,054 (trainer:732) INFO: 14epoch:train:4901-5000batch: iter_time=1.019e-04, forward_time=0.106, loss_ctc=85.585, loss_att=63.122, acc=0.681, loss=69.861, backward_time=0.744, grad_norm=103.405, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.000e-04, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:30:14,272 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 00:30:33,230 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 00:30:36,750 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 00:30:36,750 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 00:30:36,757 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 00:35:36,135 (trainer:732) INFO: 14epoch:train:5001-5100batch: iter_time=1.279, forward_time=0.105, loss_ctc=64.965, loss_att=47.311, acc=0.710, loss=52.607, backward_time=0.755, grad_norm=80.711, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.998e-05, train_time=6.481 +[gpua003:0/64] 2023-07-06 00:37:16,313 (trainer:732) INFO: 14epoch:train:5101-5200batch: iter_time=1.019e-04, forward_time=0.105, loss_ctc=71.440, loss_att=58.275, acc=0.679, loss=62.225, backward_time=0.746, grad_norm=92.300, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.994e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 00:38:55,753 (trainer:732) INFO: 14epoch:train:5201-5300batch: iter_time=9.692e-05, forward_time=0.106, loss_ctc=68.311, loss_att=51.470, acc=0.704, loss=56.522, backward_time=0.746, grad_norm=109.860, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.990e-05, train_time=1.989 +[gpua003:0/64] 2023-07-06 00:40:35,087 (trainer:732) INFO: 14epoch:train:5301-5400batch: iter_time=9.289e-05, forward_time=0.106, loss_ctc=71.699, loss_att=52.171, acc=0.683, loss=58.029, backward_time=0.746, grad_norm=87.463, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.986e-05, train_time=1.986 +[gpua003:0/64] 2023-07-06 00:42:14,290 (trainer:732) INFO: 14epoch:train:5401-5500batch: iter_time=1.061e-04, forward_time=0.105, loss_ctc=71.095, loss_att=58.813, acc=0.688, loss=62.498, backward_time=0.744, grad_norm=80.801, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.982e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 00:43:53,453 (trainer:732) INFO: 14epoch:train:5501-5600batch: iter_time=1.011e-04, forward_time=0.105, loss_ctc=65.967, loss_att=52.829, acc=0.683, loss=56.770, backward_time=0.743, grad_norm=80.073, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.978e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 00:45:32,694 (trainer:732) INFO: 14epoch:train:5601-5700batch: iter_time=9.394e-05, forward_time=0.105, loss_ctc=76.086, loss_att=65.998, acc=0.677, loss=69.024, backward_time=0.745, grad_norm=84.296, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.974e-05, train_time=1.985 +[gpua003:0/64] 2023-07-06 00:47:11,756 (trainer:732) INFO: 14epoch:train:5701-5800batch: iter_time=1.005e-04, forward_time=0.104, loss_ctc=80.981, loss_att=57.081, acc=0.692, loss=64.251, backward_time=0.745, grad_norm=117.230, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.970e-05, train_time=1.981 +[gpua003:0/64] 2023-07-06 00:47:46,785 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 00:48:05,668 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 00:48:09,217 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 00:48:09,217 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 00:48:09,223 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 00:51:56,399 (trainer:732) INFO: 14epoch:train:5801-5900batch: iter_time=1.268, forward_time=0.105, loss_ctc=70.851, loss_att=51.948, acc=0.699, loss=57.619, backward_time=0.758, grad_norm=95.312, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.966e-05, train_time=5.693 +[gpua003:0/64] 2023-07-06 00:53:36,326 (trainer:732) INFO: 14epoch:train:5901-6000batch: iter_time=9.412e-05, forward_time=0.105, loss_ctc=74.049, loss_att=61.596, acc=0.687, loss=65.332, backward_time=0.745, grad_norm=83.262, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.962e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 00:55:15,556 (trainer:732) INFO: 14epoch:train:6001-6100batch: iter_time=9.442e-05, forward_time=0.105, loss_ctc=72.845, loss_att=54.112, acc=0.692, loss=59.732, backward_time=0.744, grad_norm=84.620, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.958e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 00:56:54,727 (trainer:732) INFO: 14epoch:train:6101-6200batch: iter_time=9.749e-05, forward_time=0.106, loss_ctc=62.931, loss_att=46.045, acc=0.705, loss=51.111, backward_time=0.744, grad_norm=75.046, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.954e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 00:58:34,088 (trainer:732) INFO: 14epoch:train:6201-6300batch: iter_time=9.675e-05, forward_time=0.106, loss_ctc=75.966, loss_att=57.343, acc=0.689, loss=62.930, backward_time=0.745, grad_norm=93.914, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.950e-05, train_time=1.987 +[gpua003:0/64] 2023-07-06 01:00:28,991 (trainer:732) INFO: 14epoch:train:6301-6400batch: iter_time=9.513e-05, forward_time=0.105, loss_ctc=65.892, loss_att=55.960, acc=0.686, loss=58.940, backward_time=0.769, grad_norm=98.285, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.946e-05, train_time=2.298 +[gpua003:0/64] 2023-07-06 01:02:08,399 (trainer:732) INFO: 14epoch:train:6401-6500batch: iter_time=9.649e-05, forward_time=0.106, loss_ctc=71.103, loss_att=59.408, acc=0.685, loss=62.916, backward_time=0.746, grad_norm=81.143, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.942e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 01:03:51,462 (trainer:732) INFO: 14epoch:train:6501-6600batch: iter_time=1.032e-04, forward_time=0.106, loss_ctc=81.907, loss_att=58.088, acc=0.700, loss=65.234, backward_time=0.749, grad_norm=98.608, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.938e-05, train_time=2.061 +[gpua003:0/64] 2023-07-06 01:04:59,329 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 01:05:18,889 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 01:05:22,372 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 01:05:22,373 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 01:05:22,379 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 01:10:37,392 (trainer:732) INFO: 14epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.107, loss_ctc=81.131, loss_att=58.478, acc=0.688, loss=65.274, backward_time=0.755, grad_norm=114.707, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.935e-05, train_time=8.118 +[gpua003:0/64] 2023-07-06 01:12:18,338 (trainer:732) INFO: 14epoch:train:6701-6800batch: iter_time=1.129e-04, forward_time=0.106, loss_ctc=63.589, loss_att=52.978, acc=0.681, loss=56.162, backward_time=0.750, grad_norm=83.786, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.931e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 01:13:57,605 (trainer:732) INFO: 14epoch:train:6801-6900batch: iter_time=1.194e-04, forward_time=0.106, loss_ctc=73.216, loss_att=56.431, acc=0.697, loss=61.466, backward_time=0.746, grad_norm=85.113, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.927e-05, train_time=1.985 +[gpua003:0/64] 2023-07-06 01:15:36,756 (trainer:732) INFO: 14epoch:train:6901-7000batch: iter_time=1.192e-04, forward_time=0.106, loss_ctc=66.478, loss_att=48.895, acc=0.692, loss=54.170, backward_time=0.745, grad_norm=88.642, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.923e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 01:17:15,887 (trainer:732) INFO: 14epoch:train:7001-7100batch: iter_time=1.179e-04, forward_time=0.106, loss_ctc=70.266, loss_att=50.382, acc=0.693, loss=56.347, backward_time=0.746, grad_norm=84.071, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.919e-05, train_time=1.982 +[gpua003:0/64] 2023-07-06 01:18:54,896 (trainer:732) INFO: 14epoch:train:7101-7200batch: iter_time=1.187e-04, forward_time=0.105, loss_ctc=73.012, loss_att=61.557, acc=0.673, loss=64.994, backward_time=0.746, grad_norm=89.483, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.915e-05, train_time=1.980 +[gpua003:0/64] 2023-07-06 01:20:34,275 (trainer:732) INFO: 14epoch:train:7201-7300batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=65.363, loss_att=55.629, acc=0.669, loss=58.549, backward_time=0.746, grad_norm=89.409, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.911e-05, train_time=1.987 +[gpua003:0/64] 2023-07-06 01:22:13,430 (trainer:732) INFO: 14epoch:train:7301-7400batch: iter_time=1.061e-04, forward_time=0.105, loss_ctc=78.086, loss_att=58.693, acc=0.692, loss=64.511, backward_time=0.744, grad_norm=93.347, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.907e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 01:23:52,443 (trainer:732) INFO: 14epoch:train:7401-7500batch: iter_time=1.255e-04, forward_time=0.105, loss_ctc=85.073, loss_att=62.105, acc=0.679, loss=68.995, backward_time=0.745, grad_norm=99.476, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.903e-05, train_time=1.980 +[gpua003:0/64] 2023-07-06 01:23:53,863 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 01:24:12,946 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 01:24:16,467 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 01:24:16,467 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 01:24:16,474 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 01:28:56,627 (trainer:732) INFO: 14epoch:train:7501-7600batch: iter_time=1.287, forward_time=0.106, loss_ctc=68.678, loss_att=50.359, acc=0.697, loss=55.855, backward_time=0.755, grad_norm=80.423, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.899e-05, train_time=6.083 +[gpua003:0/64] 2023-07-06 01:30:36,072 (trainer:732) INFO: 14epoch:train:7601-7700batch: iter_time=1.040e-04, forward_time=0.106, loss_ctc=68.907, loss_att=57.070, acc=0.685, loss=60.621, backward_time=0.745, grad_norm=91.649, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.896e-05, train_time=1.989 +[gpua003:0/64] 2023-07-06 01:32:15,336 (trainer:732) INFO: 14epoch:train:7701-7800batch: iter_time=1.071e-04, forward_time=0.106, loss_ctc=70.050, loss_att=51.852, acc=0.696, loss=57.312, backward_time=0.744, grad_norm=83.173, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.892e-05, train_time=1.985 +[gpua003:0/64] 2023-07-06 01:33:54,551 (trainer:732) INFO: 14epoch:train:7801-7900batch: iter_time=9.941e-05, forward_time=0.106, loss_ctc=73.006, loss_att=51.646, acc=0.691, loss=58.054, backward_time=0.745, grad_norm=86.960, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.888e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 01:35:34,182 (trainer:732) INFO: 14epoch:train:7901-8000batch: iter_time=8.641e-05, forward_time=0.107, loss_ctc=70.408, loss_att=60.834, acc=0.686, loss=63.707, backward_time=0.748, grad_norm=102.961, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.884e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 01:37:13,489 (trainer:732) INFO: 14epoch:train:8001-8100batch: iter_time=1.062e-04, forward_time=0.107, loss_ctc=66.066, loss_att=55.633, acc=0.676, loss=58.763, backward_time=0.747, grad_norm=84.748, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.880e-05, train_time=1.986 +[gpua003:0/64] 2023-07-06 01:38:55,993 (trainer:732) INFO: 14epoch:train:8101-8200batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=77.571, loss_att=61.968, acc=0.687, loss=66.649, backward_time=0.748, grad_norm=91.196, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.876e-05, train_time=2.050 +[gpua003:0/64] 2023-07-06 01:40:35,417 (trainer:732) INFO: 14epoch:train:8201-8300batch: iter_time=9.520e-05, forward_time=0.107, loss_ctc=80.442, loss_att=55.920, acc=0.694, loss=63.276, backward_time=0.747, grad_norm=82.962, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.872e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 01:41:10,610 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 01:41:29,651 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 01:41:33,072 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 01:41:33,072 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 01:41:33,078 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 01:46:45,702 (trainer:732) INFO: 14epoch:train:8301-8400batch: iter_time=1.290, forward_time=0.120, loss_ctc=72.247, loss_att=53.015, acc=0.698, loss=58.785, backward_time=0.761, grad_norm=82.758, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.869e-05, train_time=7.405 +[gpua003:0/64] 2023-07-06 01:48:26,447 (trainer:732) INFO: 14epoch:train:8401-8500batch: iter_time=1.170e-04, forward_time=0.105, loss_ctc=72.826, loss_att=61.567, acc=0.673, loss=64.945, backward_time=0.746, grad_norm=92.862, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.865e-05, train_time=2.015 +[gpua003:0/64] 2023-07-06 01:50:06,144 (trainer:732) INFO: 14epoch:train:8501-8600batch: iter_time=1.142e-04, forward_time=0.105, loss_ctc=71.755, loss_att=54.026, acc=0.689, loss=59.345, backward_time=0.745, grad_norm=84.483, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.861e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 01:51:45,693 (trainer:732) INFO: 14epoch:train:8601-8700batch: iter_time=1.203e-04, forward_time=0.106, loss_ctc=62.700, loss_att=45.802, acc=0.702, loss=50.872, backward_time=0.746, grad_norm=76.913, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.857e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 01:53:38,865 (trainer:732) INFO: 14epoch:train:8701-8800batch: iter_time=1.071e-04, forward_time=0.106, loss_ctc=74.653, loss_att=55.710, acc=0.688, loss=61.393, backward_time=0.773, grad_norm=129.792, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.853e-05, train_time=2.263 +[gpua003:0/64] 2023-07-06 01:55:18,104 (trainer:732) INFO: 14epoch:train:8801-8900batch: iter_time=1.105e-04, forward_time=0.106, loss_ctc=64.757, loss_att=56.246, acc=0.674, loss=58.799, backward_time=0.744, grad_norm=80.970, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.849e-05, train_time=1.985 +[gpua003:0/64] 2023-07-06 01:56:59,672 (trainer:732) INFO: 14epoch:train:8901-9000batch: iter_time=1.077e-04, forward_time=0.106, loss_ctc=70.334, loss_att=57.944, acc=0.678, loss=61.661, backward_time=0.751, grad_norm=83.367, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.846e-05, train_time=2.031 +[gpua003:0/64] 2023-07-06 01:58:38,903 (trainer:732) INFO: 14epoch:train:9001-9100batch: iter_time=1.082e-04, forward_time=0.106, loss_ctc=80.565, loss_att=57.072, acc=0.700, loss=64.119, backward_time=0.745, grad_norm=93.217, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.842e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 01:59:52,859 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 02:00:11,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 02:00:15,305 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 02:00:15,305 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 02:00:15,311 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 02:04:51,398 (trainer:732) INFO: 14epoch:train:9101-9200batch: iter_time=1.321, forward_time=0.157, loss_ctc=81.944, loss_att=58.952, acc=0.689, loss=65.850, backward_time=0.767, grad_norm=105.225, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=9.838e-05, train_time=7.449 +[gpua003:0/64] 2023-07-06 02:06:31,077 (trainer:732) INFO: 14epoch:train:9201-9300batch: iter_time=1.023e-04, forward_time=0.105, loss_ctc=66.026, loss_att=53.515, acc=0.692, loss=57.268, backward_time=0.747, grad_norm=86.498, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.834e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 02:08:11,806 (trainer:732) INFO: 14epoch:train:9301-9400batch: iter_time=9.862e-05, forward_time=0.106, loss_ctc=73.964, loss_att=57.337, acc=0.700, loss=62.325, backward_time=0.745, grad_norm=85.275, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.830e-05, train_time=2.014 +[gpua003:0/64] 2023-07-06 02:09:51,859 (trainer:732) INFO: 14epoch:train:9401-9500batch: iter_time=1.055e-04, forward_time=0.106, loss_ctc=64.980, loss_att=48.354, acc=0.695, loss=53.342, backward_time=0.745, grad_norm=74.030, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.827e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 02:11:31,288 (trainer:732) INFO: 14epoch:train:9501-9600batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=70.565, loss_att=51.030, acc=0.695, loss=56.890, backward_time=0.744, grad_norm=84.506, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.823e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 02:13:10,467 (trainer:732) INFO: 14epoch:train:9601-9700batch: iter_time=1.057e-04, forward_time=0.106, loss_ctc=71.513, loss_att=60.661, acc=0.691, loss=63.916, backward_time=0.745, grad_norm=87.016, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.819e-05, train_time=1.983 +[gpua003:0/64] 2023-07-06 02:14:49,595 (trainer:732) INFO: 14epoch:train:9701-9800batch: iter_time=1.059e-04, forward_time=0.106, loss_ctc=65.538, loss_att=54.445, acc=0.682, loss=57.773, backward_time=0.744, grad_norm=94.902, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.815e-05, train_time=1.982 +[gpua003:0/64] 2023-07-06 02:16:28,940 (trainer:732) INFO: 14epoch:train:9801-9900batch: iter_time=1.015e-04, forward_time=0.107, loss_ctc=77.709, loss_att=58.250, acc=0.701, loss=64.087, backward_time=0.745, grad_norm=110.894, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.811e-05, train_time=1.987 +[gpua003:0/64] 2023-07-06 02:18:08,171 (trainer:732) INFO: 14epoch:train:9901-10000batch: iter_time=9.460e-05, forward_time=0.106, loss_ctc=82.982, loss_att=61.283, acc=0.688, loss=67.793, backward_time=0.744, grad_norm=103.022, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.808e-05, train_time=1.984 +[gpua003:0/64] 2023-07-06 02:32:00,423 (trainer:338) INFO: 14epoch results: [train] iter_time=0.165, forward_time=0.108, loss_ctc=73.204, loss_att=56.354, acc=0.685, loss=61.409, backward_time=0.748, grad_norm=93.679, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.000e-04, train_time=2.598, time=3 hours, 36 minutes and 52.71 seconds, total_count=110000, gpu_max_cached_mem_GB=34.473, [valid] loss_ctc=52.779, cer_ctc=0.299, loss_att=43.314, acc=0.648, cer=0.406, wer=0.989, loss=46.153, time=7 minutes and 15.36 seconds, total_count=11638, gpu_max_cached_mem_GB=37.768, [att_plot] time=6 minutes and 14.08 seconds, total_count=0, gpu_max_cached_mem_GB=37.768 +[gpua003:0/64] 2023-07-06 02:32:18,801 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua003:0/64] 2023-07-06 02:32:18,908 (trainer:272) INFO: 15/100epoch started. Estimated time to finish: 1 week, 6 days and 18 hours +[gpua003:0/64] 2023-07-06 02:32:19,981 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 02:32:39,156 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 02:32:42,632 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 02:32:42,633 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 02:32:42,673 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 02:39:26,949 (trainer:732) INFO: 15epoch:train:1-100batch: iter_time=3.199, forward_time=0.158, loss_ctc=82.906, loss_att=63.268, acc=0.672, loss=69.159, backward_time=0.766, grad_norm=96.452, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.118, optim0_lr0=9.804e-05, train_time=8.551 +[gpua003:0/64] 2023-07-06 02:41:12,530 (trainer:732) INFO: 15epoch:train:101-200batch: iter_time=1.083e-04, forward_time=0.110, loss_ctc=93.463, loss_att=58.148, acc=0.684, loss=68.742, backward_time=0.761, grad_norm=99.343, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.800e-05, train_time=2.112 +[gpua003:0/64] 2023-07-06 02:42:58,616 (trainer:732) INFO: 15epoch:train:201-300batch: iter_time=1.075e-04, forward_time=0.109, loss_ctc=72.287, loss_att=52.500, acc=0.677, loss=58.436, backward_time=0.756, grad_norm=100.014, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.796e-05, train_time=2.122 +[gpua003:0/64] 2023-07-06 02:44:49,824 (trainer:732) INFO: 15epoch:train:301-400batch: iter_time=1.050e-04, forward_time=0.108, loss_ctc=74.935, loss_att=63.168, acc=0.669, loss=66.698, backward_time=0.769, grad_norm=93.180, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.793e-05, train_time=2.224 +[gpua003:0/64] 2023-07-06 02:46:40,595 (trainer:732) INFO: 15epoch:train:401-500batch: iter_time=1.069e-04, forward_time=0.109, loss_ctc=84.885, loss_att=68.005, acc=0.654, loss=73.069, backward_time=0.761, grad_norm=120.148, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=9.789e-05, train_time=2.215 +[gpua003:0/64] 2023-07-06 02:48:27,207 (trainer:732) INFO: 15epoch:train:501-600batch: iter_time=1.120e-04, forward_time=0.116, loss_ctc=88.432, loss_att=68.337, acc=0.676, loss=74.366, backward_time=0.760, grad_norm=91.380, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.785e-05, train_time=2.132 +[gpua003:0/64] 2023-07-06 02:50:13,816 (trainer:732) INFO: 15epoch:train:601-700batch: iter_time=1.091e-04, forward_time=0.109, loss_ctc=76.229, loss_att=54.425, acc=0.696, loss=60.966, backward_time=0.767, grad_norm=83.791, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.781e-05, train_time=2.132 +[gpua003:0/64] 2023-07-06 02:52:11,799 (trainer:732) INFO: 15epoch:train:701-800batch: iter_time=1.037e-04, forward_time=0.120, loss_ctc=79.546, loss_att=62.351, acc=0.675, loss=67.509, backward_time=0.782, grad_norm=103.551, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.778e-05, train_time=2.359 +[gpua003:0/64] 2023-07-06 02:53:02,111 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 02:53:21,354 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 02:53:24,837 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 02:53:24,837 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 02:53:24,887 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 02:58:05,256 (trainer:732) INFO: 15epoch:train:801-900batch: iter_time=1.628, forward_time=0.133, loss_ctc=82.093, loss_att=59.999, acc=0.678, loss=66.627, backward_time=0.769, grad_norm=104.738, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.774e-05, train_time=7.069 +[gpua003:0/64] 2023-07-06 02:59:46,184 (trainer:732) INFO: 15epoch:train:901-1000batch: iter_time=1.123e-04, forward_time=0.108, loss_ctc=77.407, loss_att=56.196, acc=0.685, loss=62.559, backward_time=0.752, grad_norm=95.582, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.770e-05, train_time=2.018 +[gpua003:0/64] 2023-07-06 03:01:26,322 (trainer:732) INFO: 15epoch:train:1001-1100batch: iter_time=9.835e-05, forward_time=0.107, loss_ctc=85.077, loss_att=54.745, acc=0.695, loss=63.845, backward_time=0.755, grad_norm=87.655, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.766e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 03:03:06,138 (trainer:732) INFO: 15epoch:train:1101-1200batch: iter_time=8.910e-05, forward_time=0.106, loss_ctc=71.804, loss_att=57.490, acc=0.664, loss=61.784, backward_time=0.752, grad_norm=96.520, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.763e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 03:04:46,029 (trainer:732) INFO: 15epoch:train:1201-1300batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=78.767, loss_att=64.261, acc=0.679, loss=68.613, backward_time=0.752, grad_norm=95.553, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.759e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 03:06:25,781 (trainer:732) INFO: 15epoch:train:1301-1400batch: iter_time=9.587e-05, forward_time=0.106, loss_ctc=89.123, loss_att=69.314, acc=0.678, loss=75.257, backward_time=0.752, grad_norm=99.925, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.755e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 03:08:05,655 (trainer:732) INFO: 15epoch:train:1401-1500batch: iter_time=9.323e-05, forward_time=0.106, loss_ctc=77.240, loss_att=56.417, acc=0.693, loss=62.664, backward_time=0.752, grad_norm=92.784, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.751e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 03:09:45,213 (trainer:732) INFO: 15epoch:train:1501-1600batch: iter_time=9.074e-05, forward_time=0.105, loss_ctc=76.823, loss_att=57.859, acc=0.677, loss=63.548, backward_time=0.751, grad_norm=99.313, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.748e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 03:10:52,700 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 03:11:11,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 03:11:15,554 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 03:11:15,554 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 03:11:15,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 03:16:19,134 (trainer:732) INFO: 15epoch:train:1601-1700batch: iter_time=1.281, forward_time=0.107, loss_ctc=78.684, loss_att=60.055, acc=0.678, loss=65.643, backward_time=0.768, grad_norm=81.939, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.744e-05, train_time=7.878 +[gpua003:0/64] 2023-07-06 03:18:00,308 (trainer:732) INFO: 15epoch:train:1701-1800batch: iter_time=9.922e-05, forward_time=0.107, loss_ctc=79.008, loss_att=56.298, acc=0.696, loss=63.111, backward_time=0.753, grad_norm=81.521, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.740e-05, train_time=2.023 +[gpua003:0/64] 2023-07-06 03:19:40,539 (trainer:732) INFO: 15epoch:train:1801-1900batch: iter_time=9.836e-05, forward_time=0.108, loss_ctc=90.709, loss_att=57.184, acc=0.688, loss=67.242, backward_time=0.754, grad_norm=97.794, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.737e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 03:21:20,631 (trainer:732) INFO: 15epoch:train:1901-2000batch: iter_time=9.980e-05, forward_time=0.107, loss_ctc=71.072, loss_att=53.719, acc=0.682, loss=58.925, backward_time=0.751, grad_norm=83.439, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.733e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 03:23:00,483 (trainer:732) INFO: 15epoch:train:2001-2100batch: iter_time=9.705e-05, forward_time=0.107, loss_ctc=74.810, loss_att=63.359, acc=0.675, loss=66.794, backward_time=0.751, grad_norm=91.749, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.729e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 03:24:40,225 (trainer:732) INFO: 15epoch:train:2101-2200batch: iter_time=9.846e-05, forward_time=0.107, loss_ctc=86.087, loss_att=63.950, acc=0.671, loss=70.591, backward_time=0.751, grad_norm=104.916, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.726e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 03:26:19,997 (trainer:732) INFO: 15epoch:train:2201-2300batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=81.168, loss_att=65.843, acc=0.683, loss=70.440, backward_time=0.751, grad_norm=85.546, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.722e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 03:28:19,638 (trainer:732) INFO: 15epoch:train:2301-2400batch: iter_time=9.845e-05, forward_time=0.106, loss_ctc=75.986, loss_att=55.773, acc=0.686, loss=61.837, backward_time=0.780, grad_norm=89.935, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.718e-05, train_time=2.393 +[gpua003:0/64] 2023-07-06 03:30:11,150 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 03:30:30,084 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 03:30:33,644 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 03:30:33,644 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 03:30:33,650 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 03:33:45,898 (trainer:732) INFO: 15epoch:train:2401-2500batch: iter_time=1.301, forward_time=0.142, loss_ctc=78.818, loss_att=56.765, acc=0.693, loss=63.381, backward_time=0.782, grad_norm=91.105, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.715e-05, train_time=6.525 +[gpua003:0/64] 2023-07-06 03:35:27,576 (trainer:732) INFO: 15epoch:train:2501-2600batch: iter_time=1.159e-04, forward_time=0.110, loss_ctc=80.164, loss_att=60.607, acc=0.691, loss=66.474, backward_time=0.759, grad_norm=88.751, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.711e-05, train_time=2.034 +[gpua003:0/64] 2023-07-06 03:37:07,711 (trainer:732) INFO: 15epoch:train:2601-2700batch: iter_time=1.071e-04, forward_time=0.108, loss_ctc=89.216, loss_att=56.175, acc=0.699, loss=66.087, backward_time=0.752, grad_norm=94.279, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.707e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 03:38:47,535 (trainer:732) INFO: 15epoch:train:2701-2800batch: iter_time=1.003e-04, forward_time=0.107, loss_ctc=71.669, loss_att=52.232, acc=0.691, loss=58.063, backward_time=0.752, grad_norm=78.637, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.704e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 03:40:27,453 (trainer:732) INFO: 15epoch:train:2801-2900batch: iter_time=9.136e-05, forward_time=0.108, loss_ctc=72.118, loss_att=61.501, acc=0.688, loss=64.686, backward_time=0.752, grad_norm=86.278, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.700e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 03:42:07,650 (trainer:732) INFO: 15epoch:train:2901-3000batch: iter_time=9.709e-05, forward_time=0.108, loss_ctc=83.002, loss_att=63.713, acc=0.675, loss=69.500, backward_time=0.754, grad_norm=94.808, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.696e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 03:43:47,676 (trainer:732) INFO: 15epoch:train:3001-3100batch: iter_time=9.954e-05, forward_time=0.108, loss_ctc=85.122, loss_att=64.702, acc=0.694, loss=70.828, backward_time=0.754, grad_norm=92.367, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.693e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 03:45:30,195 (trainer:732) INFO: 15epoch:train:3101-3200batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=74.716, loss_att=52.707, acc=0.702, loss=59.310, backward_time=0.753, grad_norm=81.698, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.689e-05, train_time=2.050 +[gpua003:0/64] 2023-07-06 03:47:13,089 (trainer:732) INFO: 15epoch:train:3201-3300batch: iter_time=9.444e-05, forward_time=0.108, loss_ctc=77.867, loss_att=61.783, acc=0.691, loss=66.608, backward_time=0.759, grad_norm=98.749, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.685e-05, train_time=2.058 +[gpua003:0/64] 2023-07-06 03:47:53,832 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 03:48:12,960 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 03:48:16,513 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 03:48:16,514 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 03:48:16,520 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 03:53:07,778 (trainer:732) INFO: 15epoch:train:3301-3400batch: iter_time=1.891, forward_time=0.108, loss_ctc=80.670, loss_att=59.551, acc=0.693, loss=65.887, backward_time=0.768, grad_norm=91.581, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.682e-05, train_time=7.094 +[gpua003:0/64] 2023-07-06 03:54:48,045 (trainer:732) INFO: 15epoch:train:3401-3500batch: iter_time=1.099e-04, forward_time=0.109, loss_ctc=76.671, loss_att=54.588, acc=0.697, loss=61.213, backward_time=0.753, grad_norm=86.293, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.678e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 03:56:29,514 (trainer:732) INFO: 15epoch:train:3501-3600batch: iter_time=9.121e-05, forward_time=0.108, loss_ctc=83.637, loss_att=55.309, acc=0.702, loss=63.808, backward_time=0.753, grad_norm=89.277, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.674e-05, train_time=2.029 +[gpua003:0/64] 2023-07-06 03:58:09,348 (trainer:732) INFO: 15epoch:train:3601-3700batch: iter_time=9.560e-05, forward_time=0.108, loss_ctc=70.720, loss_att=55.269, acc=0.679, loss=59.904, backward_time=0.751, grad_norm=89.133, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.671e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 03:59:49,035 (trainer:732) INFO: 15epoch:train:3701-3800batch: iter_time=9.637e-05, forward_time=0.107, loss_ctc=77.221, loss_att=62.310, acc=0.690, loss=66.783, backward_time=0.751, grad_norm=96.452, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.667e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 04:01:31,489 (trainer:732) INFO: 15epoch:train:3801-3900batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=86.594, loss_att=65.894, acc=0.691, loss=72.104, backward_time=0.753, grad_norm=92.320, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.664e-05, train_time=2.049 +[gpua003:0/64] 2023-07-06 04:03:11,208 (trainer:732) INFO: 15epoch:train:3901-4000batch: iter_time=9.501e-05, forward_time=0.107, loss_ctc=78.169, loss_att=56.640, acc=0.693, loss=63.098, backward_time=0.750, grad_norm=89.150, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.660e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 04:04:51,122 (trainer:732) INFO: 15epoch:train:4001-4100batch: iter_time=9.624e-05, forward_time=0.107, loss_ctc=76.858, loss_att=56.884, acc=0.686, loss=62.876, backward_time=0.751, grad_norm=103.211, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.656e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 04:05:57,372 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 04:06:16,333 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 04:06:19,864 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 04:06:19,864 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 04:06:19,870 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 04:09:19,124 (trainer:732) INFO: 15epoch:train:4101-4200batch: iter_time=1.298, forward_time=0.107, loss_ctc=76.656, loss_att=57.378, acc=0.699, loss=63.161, backward_time=0.761, grad_norm=84.164, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.653e-05, train_time=5.360 +[gpua003:0/64] 2023-07-06 04:10:59,509 (trainer:732) INFO: 15epoch:train:4201-4300batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=77.716, loss_att=58.424, acc=0.694, loss=64.212, backward_time=0.754, grad_norm=86.815, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.649e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 04:12:39,763 (trainer:732) INFO: 15epoch:train:4301-4400batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=86.970, loss_att=57.258, acc=0.694, loss=66.171, backward_time=0.754, grad_norm=89.527, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.646e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 04:14:19,772 (trainer:732) INFO: 15epoch:train:4401-4500batch: iter_time=8.559e-05, forward_time=0.108, loss_ctc=70.095, loss_att=53.690, acc=0.681, loss=58.611, backward_time=0.754, grad_norm=78.614, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.642e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 04:15:59,517 (trainer:732) INFO: 15epoch:train:4501-4600batch: iter_time=9.013e-05, forward_time=0.107, loss_ctc=74.563, loss_att=61.596, acc=0.680, loss=65.486, backward_time=0.751, grad_norm=89.324, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.638e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 04:17:39,351 (trainer:732) INFO: 15epoch:train:4601-4700batch: iter_time=9.033e-05, forward_time=0.108, loss_ctc=83.925, loss_att=60.152, acc=0.693, loss=67.284, backward_time=0.752, grad_norm=90.456, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.635e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 04:19:19,250 (trainer:732) INFO: 15epoch:train:4701-4800batch: iter_time=9.522e-05, forward_time=0.108, loss_ctc=78.761, loss_att=63.200, acc=0.690, loss=67.868, backward_time=0.753, grad_norm=86.126, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.631e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 04:20:59,100 (trainer:732) INFO: 15epoch:train:4801-4900batch: iter_time=9.812e-05, forward_time=0.107, loss_ctc=74.851, loss_att=55.680, acc=0.694, loss=61.431, backward_time=0.752, grad_norm=103.100, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.628e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 04:22:39,137 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 04:22:58,402 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 04:23:01,981 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 04:23:01,981 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 04:23:01,987 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 04:26:15,035 (trainer:732) INFO: 15epoch:train:4901-5000batch: iter_time=1.298, forward_time=0.107, loss_ctc=79.639, loss_att=56.728, acc=0.693, loss=63.601, backward_time=0.757, grad_norm=91.688, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.624e-05, train_time=6.318 +[gpua003:0/64] 2023-07-06 04:27:57,014 (trainer:732) INFO: 15epoch:train:5001-5100batch: iter_time=1.188e-04, forward_time=0.110, loss_ctc=79.481, loss_att=59.825, acc=0.685, loss=65.722, backward_time=0.758, grad_norm=89.848, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.621e-05, train_time=2.039 +[gpua003:0/64] 2023-07-06 04:29:37,013 (trainer:732) INFO: 15epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=86.315, loss_att=56.327, acc=0.690, loss=65.324, backward_time=0.752, grad_norm=105.057, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.617e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 04:31:16,972 (trainer:732) INFO: 15epoch:train:5201-5300batch: iter_time=1.038e-04, forward_time=0.106, loss_ctc=70.861, loss_att=50.758, acc=0.689, loss=56.789, backward_time=0.752, grad_norm=87.606, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.614e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 04:32:56,687 (trainer:732) INFO: 15epoch:train:5301-5400batch: iter_time=9.220e-05, forward_time=0.106, loss_ctc=71.546, loss_att=60.197, acc=0.685, loss=63.601, backward_time=0.750, grad_norm=91.661, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.610e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 04:34:36,595 (trainer:732) INFO: 15epoch:train:5401-5500batch: iter_time=9.018e-05, forward_time=0.107, loss_ctc=82.998, loss_att=65.769, acc=0.666, loss=70.938, backward_time=0.752, grad_norm=109.581, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.606e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 04:36:16,255 (trainer:732) INFO: 15epoch:train:5501-5600batch: iter_time=9.943e-05, forward_time=0.107, loss_ctc=82.860, loss_att=64.257, acc=0.691, loss=69.838, backward_time=0.750, grad_norm=91.728, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.603e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 04:37:55,929 (trainer:732) INFO: 15epoch:train:5601-5700batch: iter_time=9.638e-05, forward_time=0.106, loss_ctc=74.731, loss_att=53.015, acc=0.702, loss=59.530, backward_time=0.751, grad_norm=90.537, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.599e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 04:39:35,714 (trainer:732) INFO: 15epoch:train:5701-5800batch: iter_time=9.332e-05, forward_time=0.107, loss_ctc=76.746, loss_att=60.403, acc=0.686, loss=65.306, backward_time=0.751, grad_norm=100.897, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.596e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 04:40:08,943 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 04:40:28,479 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 04:40:32,043 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 04:40:32,043 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 04:40:32,049 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 04:44:18,911 (trainer:732) INFO: 15epoch:train:5801-5900batch: iter_time=1.331, forward_time=0.108, loss_ctc=75.757, loss_att=54.746, acc=0.692, loss=61.049, backward_time=0.767, grad_norm=90.029, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.592e-05, train_time=5.664 +[gpua003:0/64] 2023-07-06 04:45:59,455 (trainer:732) INFO: 15epoch:train:5901-6000batch: iter_time=1.005e-04, forward_time=0.107, loss_ctc=80.541, loss_att=59.034, acc=0.691, loss=65.486, backward_time=0.753, grad_norm=101.034, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.589e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 04:47:39,684 (trainer:732) INFO: 15epoch:train:6001-6100batch: iter_time=9.840e-05, forward_time=0.107, loss_ctc=77.216, loss_att=50.560, acc=0.689, loss=58.557, backward_time=0.750, grad_norm=91.962, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.585e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 04:49:19,460 (trainer:732) INFO: 15epoch:train:6101-6200batch: iter_time=9.863e-05, forward_time=0.108, loss_ctc=70.142, loss_att=56.782, acc=0.679, loss=60.790, backward_time=0.751, grad_norm=87.925, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.582e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 04:50:59,293 (trainer:732) INFO: 15epoch:train:6201-6300batch: iter_time=9.683e-05, forward_time=0.107, loss_ctc=84.691, loss_att=67.527, acc=0.672, loss=72.676, backward_time=0.751, grad_norm=96.501, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.578e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 04:52:39,001 (trainer:732) INFO: 15epoch:train:6301-6400batch: iter_time=9.518e-05, forward_time=0.108, loss_ctc=78.908, loss_att=59.469, acc=0.696, loss=65.301, backward_time=0.751, grad_norm=90.460, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.575e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 04:54:18,553 (trainer:732) INFO: 15epoch:train:6401-6500batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=79.232, loss_att=59.510, acc=0.686, loss=65.427, backward_time=0.749, grad_norm=104.175, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.571e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 04:55:58,195 (trainer:732) INFO: 15epoch:train:6501-6600batch: iter_time=9.919e-05, forward_time=0.108, loss_ctc=74.355, loss_att=57.326, acc=0.685, loss=62.435, backward_time=0.750, grad_norm=89.442, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.568e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 04:57:05,281 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 04:57:24,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 04:57:27,875 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 04:57:27,875 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 04:57:27,881 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 05:03:01,395 (trainer:732) INFO: 15epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.108, loss_ctc=78.273, loss_att=57.077, acc=0.685, loss=63.436, backward_time=0.759, grad_norm=85.784, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.564e-05, train_time=8.464 +[gpua003:0/64] 2023-07-06 05:04:42,396 (trainer:732) INFO: 15epoch:train:6701-6800batch: iter_time=1.146e-04, forward_time=0.110, loss_ctc=75.552, loss_att=54.344, acc=0.708, loss=60.707, backward_time=0.754, grad_norm=87.661, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.561e-05, train_time=2.020 +[gpua003:0/64] 2023-07-06 05:06:22,783 (trainer:732) INFO: 15epoch:train:6801-6900batch: iter_time=1.151e-04, forward_time=0.109, loss_ctc=87.760, loss_att=56.616, acc=0.702, loss=65.959, backward_time=0.754, grad_norm=109.179, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.557e-05, train_time=2.008 +[gpua003:0/64] 2023-07-06 05:08:02,835 (trainer:732) INFO: 15epoch:train:6901-7000batch: iter_time=1.119e-04, forward_time=0.110, loss_ctc=68.674, loss_att=52.588, acc=0.694, loss=57.414, backward_time=0.753, grad_norm=86.799, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.554e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 05:09:42,653 (trainer:732) INFO: 15epoch:train:7001-7100batch: iter_time=1.118e-04, forward_time=0.109, loss_ctc=73.621, loss_att=61.316, acc=0.689, loss=65.008, backward_time=0.752, grad_norm=105.693, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.550e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 05:11:22,495 (trainer:732) INFO: 15epoch:train:7101-7200batch: iter_time=1.103e-04, forward_time=0.109, loss_ctc=83.133, loss_att=61.505, acc=0.686, loss=67.994, backward_time=0.752, grad_norm=104.402, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.547e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 05:13:02,248 (trainer:732) INFO: 15epoch:train:7201-7300batch: iter_time=1.110e-04, forward_time=0.109, loss_ctc=81.010, loss_att=64.819, acc=0.692, loss=69.677, backward_time=0.751, grad_norm=110.286, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.543e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 05:14:42,319 (trainer:732) INFO: 15epoch:train:7301-7400batch: iter_time=1.107e-04, forward_time=0.109, loss_ctc=72.689, loss_att=54.067, acc=0.697, loss=59.654, backward_time=0.752, grad_norm=85.938, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.540e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 05:16:26,124 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 05:16:45,225 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 05:16:48,776 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 05:16:48,777 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 05:16:48,783 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 05:21:21,795 (trainer:732) INFO: 15epoch:train:7401-7500batch: iter_time=1.282, forward_time=0.110, loss_ctc=75.686, loss_att=56.849, acc=0.702, loss=62.500, backward_time=0.778, grad_norm=88.301, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.536e-05, train_time=7.989 +[gpua003:0/64] 2023-07-06 05:23:04,740 (trainer:732) INFO: 15epoch:train:7501-7600batch: iter_time=1.225e-04, forward_time=0.109, loss_ctc=74.838, loss_att=55.781, acc=0.699, loss=61.498, backward_time=0.759, grad_norm=93.290, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.533e-05, train_time=2.059 +[gpua003:0/64] 2023-07-06 05:24:45,328 (trainer:732) INFO: 15epoch:train:7601-7700batch: iter_time=1.022e-04, forward_time=0.110, loss_ctc=83.499, loss_att=54.322, acc=0.699, loss=63.075, backward_time=0.754, grad_norm=90.328, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.529e-05, train_time=2.012 +[gpua003:0/64] 2023-07-06 05:26:25,161 (trainer:732) INFO: 15epoch:train:7701-7800batch: iter_time=1.134e-04, forward_time=0.109, loss_ctc=71.384, loss_att=51.519, acc=0.703, loss=57.479, backward_time=0.751, grad_norm=86.506, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.526e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 05:28:04,981 (trainer:732) INFO: 15epoch:train:7801-7900batch: iter_time=1.131e-04, forward_time=0.109, loss_ctc=73.904, loss_att=64.610, acc=0.679, loss=67.398, backward_time=0.751, grad_norm=102.632, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.522e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 05:29:44,880 (trainer:732) INFO: 15epoch:train:7901-8000batch: iter_time=1.167e-04, forward_time=0.109, loss_ctc=83.085, loss_att=62.060, acc=0.679, loss=68.368, backward_time=0.753, grad_norm=103.977, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.519e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 05:31:24,806 (trainer:732) INFO: 15epoch:train:8001-8100batch: iter_time=1.085e-04, forward_time=0.109, loss_ctc=80.413, loss_att=61.210, acc=0.698, loss=66.971, backward_time=0.752, grad_norm=96.822, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.516e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 05:33:06,003 (trainer:732) INFO: 15epoch:train:8101-8200batch: iter_time=9.574e-05, forward_time=0.109, loss_ctc=75.570, loss_att=55.287, acc=0.694, loss=61.372, backward_time=0.753, grad_norm=88.815, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.512e-05, train_time=2.024 +[gpua003:0/64] 2023-07-06 05:34:48,433 (trainer:732) INFO: 15epoch:train:8201-8300batch: iter_time=1.065e-04, forward_time=0.110, loss_ctc=73.115, loss_att=57.381, acc=0.707, loss=62.101, backward_time=0.755, grad_norm=84.564, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.509e-05, train_time=2.048 +[gpua003:0/64] 2023-07-06 05:35:22,627 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 05:35:42,157 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 05:35:45,995 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 05:35:45,995 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 05:35:46,001 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 05:40:45,752 (trainer:732) INFO: 15epoch:train:8301-8400batch: iter_time=1.326, forward_time=0.109, loss_ctc=77.487, loss_att=56.217, acc=0.681, loss=62.598, backward_time=0.787, grad_norm=82.284, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.505e-05, train_time=7.146 +[gpua003:0/64] 2023-07-06 05:42:30,027 (trainer:732) INFO: 15epoch:train:8401-8500batch: iter_time=1.096e-04, forward_time=0.109, loss_ctc=81.840, loss_att=60.268, acc=0.694, loss=66.740, backward_time=0.763, grad_norm=98.195, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.502e-05, train_time=2.085 +[gpua003:0/64] 2023-07-06 05:44:15,543 (trainer:732) INFO: 15epoch:train:8501-8600batch: iter_time=1.047e-04, forward_time=0.108, loss_ctc=78.760, loss_att=51.979, acc=0.691, loss=60.013, backward_time=0.768, grad_norm=92.124, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.498e-05, train_time=2.110 +[gpua003:0/64] 2023-07-06 05:46:05,064 (trainer:732) INFO: 15epoch:train:8601-8700batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=70.996, loss_att=57.564, acc=0.676, loss=61.593, backward_time=0.777, grad_norm=91.103, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.495e-05, train_time=2.190 +[gpua003:0/64] 2023-07-06 05:47:51,510 (trainer:732) INFO: 15epoch:train:8701-8800batch: iter_time=1.110e-04, forward_time=0.108, loss_ctc=80.422, loss_att=66.412, acc=0.672, loss=70.615, backward_time=0.757, grad_norm=94.400, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.491e-05, train_time=2.129 +[gpua003:0/64] 2023-07-06 05:49:31,215 (trainer:732) INFO: 15epoch:train:8801-8900batch: iter_time=1.153e-04, forward_time=0.108, loss_ctc=78.907, loss_att=60.674, acc=0.693, loss=66.144, backward_time=0.750, grad_norm=89.498, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.488e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 05:51:11,993 (trainer:732) INFO: 15epoch:train:8901-9000batch: iter_time=1.134e-04, forward_time=0.108, loss_ctc=76.299, loss_att=56.694, acc=0.694, loss=62.575, backward_time=0.751, grad_norm=82.090, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.485e-05, train_time=2.015 +[gpua003:0/64] 2023-07-06 05:52:52,430 (trainer:732) INFO: 15epoch:train:9001-9100batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=74.360, loss_att=57.538, acc=0.689, loss=62.585, backward_time=0.750, grad_norm=87.996, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.481e-05, train_time=2.009 +[gpua003:0/64] 2023-07-06 05:54:01,666 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 05:54:20,653 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 05:54:24,452 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 05:54:24,452 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 05:54:24,459 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 05:58:56,052 (trainer:732) INFO: 15epoch:train:9101-9200batch: iter_time=1.330, forward_time=0.135, loss_ctc=76.652, loss_att=55.319, acc=0.689, loss=61.719, backward_time=0.760, grad_norm=94.798, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.478e-05, train_time=7.272 +[gpua003:0/64] 2023-07-06 06:00:39,277 (trainer:732) INFO: 15epoch:train:9201-9300batch: iter_time=9.552e-05, forward_time=0.111, loss_ctc=77.373, loss_att=56.510, acc=0.706, loss=62.769, backward_time=0.757, grad_norm=98.925, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.474e-05, train_time=2.064 +[gpua003:0/64] 2023-07-06 06:02:26,540 (trainer:732) INFO: 15epoch:train:9301-9400batch: iter_time=1.017e-04, forward_time=0.109, loss_ctc=86.609, loss_att=55.256, acc=0.704, loss=64.662, backward_time=0.763, grad_norm=94.931, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.471e-05, train_time=2.145 +[gpua003:0/64] 2023-07-06 06:04:09,380 (trainer:732) INFO: 15epoch:train:9401-9500batch: iter_time=1.045e-04, forward_time=0.109, loss_ctc=69.832, loss_att=52.386, acc=0.693, loss=57.620, backward_time=0.753, grad_norm=89.310, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.468e-05, train_time=2.057 +[gpua003:0/64] 2023-07-06 06:05:51,252 (trainer:732) INFO: 15epoch:train:9501-9600batch: iter_time=1.037e-04, forward_time=0.109, loss_ctc=73.533, loss_att=62.841, acc=0.688, loss=66.049, backward_time=0.751, grad_norm=93.939, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.464e-05, train_time=2.037 +[gpua003:0/64] 2023-07-06 06:07:41,819 (trainer:732) INFO: 15epoch:train:9601-9700batch: iter_time=1.076e-04, forward_time=0.108, loss_ctc=83.150, loss_att=60.639, acc=0.687, loss=67.393, backward_time=0.774, grad_norm=94.514, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.461e-05, train_time=2.211 +[gpua003:0/64] 2023-07-06 06:09:23,340 (trainer:732) INFO: 15epoch:train:9701-9800batch: iter_time=1.286e-04, forward_time=0.108, loss_ctc=81.189, loss_att=65.286, acc=0.694, loss=70.057, backward_time=0.752, grad_norm=92.569, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.457e-05, train_time=2.030 +[gpua003:0/64] 2023-07-06 06:11:03,236 (trainer:732) INFO: 15epoch:train:9801-9900batch: iter_time=1.122e-04, forward_time=0.107, loss_ctc=72.362, loss_att=53.624, acc=0.694, loss=59.246, backward_time=0.751, grad_norm=87.692, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.111, optim0_lr0=9.454e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 06:12:42,895 (trainer:732) INFO: 15epoch:train:9901-10000batch: iter_time=1.061e-04, forward_time=0.107, loss_ctc=75.625, loss_att=55.477, acc=0.704, loss=61.522, backward_time=0.750, grad_norm=86.254, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.451e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 06:24:28,681 (trainer:338) INFO: 15epoch results: [train] iter_time=0.185, forward_time=0.110, loss_ctc=78.489, loss_att=58.649, acc=0.688, loss=64.601, backward_time=0.756, grad_norm=93.007, clip=100.000, loss_scale=7.318e+14, optim_step_time=0.112, optim0_lr0=9.624e-05, train_time=2.644, time=3 hours, 40 minutes and 32.98 seconds, total_count=120000, gpu_max_cached_mem_GB=37.770, [valid] loss_ctc=52.634, cer_ctc=0.298, loss_att=43.555, acc=0.657, cer=0.377, wer=0.991, loss=46.279, time=5 minutes and 28.62 seconds, total_count=12650, gpu_max_cached_mem_GB=37.770, [att_plot] time=6 minutes and 8.01 seconds, total_count=0, gpu_max_cached_mem_GB=37.770 +[gpua003:0/64] 2023-07-06 06:24:45,984 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua003:0/64] 2023-07-06 06:24:45,990 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/9epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/10epoch.pth +[gpua003:0/64] 2023-07-06 06:24:46,016 (trainer:272) INFO: 16/100epoch started. Estimated time to finish: 1 week, 6 days and 16 hours +[gpua003:0/64] 2023-07-06 06:24:46,836 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 06:25:05,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 06:25:10,550 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 06:25:10,550 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 06:25:10,617 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 06:31:07,295 (trainer:732) INFO: 16epoch:train:1-100batch: iter_time=2.752, forward_time=0.134, loss_ctc=75.117, loss_att=48.716, acc=0.689, loss=56.636, backward_time=0.771, grad_norm=101.201, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.116, optim0_lr0=9.447e-05, train_time=7.615 +[gpua003:0/64] 2023-07-06 06:32:48,088 (trainer:732) INFO: 16epoch:train:101-200batch: iter_time=1.138e-04, forward_time=0.108, loss_ctc=74.028, loss_att=52.520, acc=0.697, loss=58.972, backward_time=0.754, grad_norm=94.459, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.444e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 06:34:29,724 (trainer:732) INFO: 16epoch:train:201-300batch: iter_time=1.056e-04, forward_time=0.109, loss_ctc=64.990, loss_att=51.033, acc=0.694, loss=55.220, backward_time=0.752, grad_norm=82.414, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.441e-05, train_time=2.033 +[gpua003:0/64] 2023-07-06 06:36:17,878 (trainer:732) INFO: 16epoch:train:301-400batch: iter_time=1.070e-04, forward_time=0.108, loss_ctc=76.120, loss_att=59.374, acc=0.683, loss=64.398, backward_time=0.760, grad_norm=93.942, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.437e-05, train_time=2.163 +[gpua003:0/64] 2023-07-06 06:38:00,826 (trainer:732) INFO: 16epoch:train:401-500batch: iter_time=1.101e-04, forward_time=0.107, loss_ctc=67.187, loss_att=49.787, acc=0.695, loss=55.007, backward_time=0.754, grad_norm=82.026, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.434e-05, train_time=2.059 +[gpua003:0/64] 2023-07-06 06:39:53,617 (trainer:732) INFO: 16epoch:train:501-600batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=81.351, loss_att=59.096, acc=0.689, loss=65.772, backward_time=0.772, grad_norm=90.956, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.431e-05, train_time=2.256 +[gpua003:0/64] 2023-07-06 06:41:43,234 (trainer:732) INFO: 16epoch:train:601-700batch: iter_time=9.752e-05, forward_time=0.108, loss_ctc=87.613, loss_att=64.737, acc=0.688, loss=71.600, backward_time=0.776, grad_norm=130.042, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.427e-05, train_time=2.192 +[gpua003:0/64] 2023-07-06 06:43:33,517 (trainer:732) INFO: 16epoch:train:701-800batch: iter_time=1.003e-04, forward_time=0.109, loss_ctc=72.526, loss_att=59.461, acc=0.682, loss=63.381, backward_time=0.768, grad_norm=92.190, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.424e-05, train_time=2.205 +[gpua003:0/64] 2023-07-06 06:44:14,263 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 06:44:33,081 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 06:44:36,840 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 06:44:36,840 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 06:44:36,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 06:48:21,733 (trainer:732) INFO: 16epoch:train:801-900batch: iter_time=1.321, forward_time=0.107, loss_ctc=80.667, loss_att=58.981, acc=0.688, loss=65.487, backward_time=0.768, grad_norm=96.967, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.420e-05, train_time=5.764 +[gpua003:0/64] 2023-07-06 06:50:06,977 (trainer:732) INFO: 16epoch:train:901-1000batch: iter_time=9.995e-05, forward_time=0.107, loss_ctc=71.004, loss_att=50.281, acc=0.699, loss=56.498, backward_time=0.758, grad_norm=96.342, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.417e-05, train_time=2.105 +[gpua003:0/64] 2023-07-06 06:51:47,247 (trainer:732) INFO: 16epoch:train:1001-1100batch: iter_time=1.007e-04, forward_time=0.107, loss_ctc=62.698, loss_att=47.891, acc=0.697, loss=52.333, backward_time=0.752, grad_norm=74.518, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.414e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 06:53:27,011 (trainer:732) INFO: 16epoch:train:1101-1200batch: iter_time=1.048e-04, forward_time=0.108, loss_ctc=75.410, loss_att=57.557, acc=0.687, loss=62.913, backward_time=0.751, grad_norm=83.906, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.410e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 06:55:06,788 (trainer:732) INFO: 16epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.107, loss_ctc=64.005, loss_att=47.055, acc=0.700, loss=52.140, backward_time=0.751, grad_norm=79.784, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.407e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 06:56:46,657 (trainer:732) INFO: 16epoch:train:1301-1400batch: iter_time=9.661e-05, forward_time=0.107, loss_ctc=81.759, loss_att=59.701, acc=0.678, loss=66.319, backward_time=0.751, grad_norm=92.222, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.404e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 06:58:26,430 (trainer:732) INFO: 16epoch:train:1401-1500batch: iter_time=9.571e-05, forward_time=0.107, loss_ctc=82.212, loss_att=60.520, acc=0.699, loss=67.027, backward_time=0.752, grad_norm=85.514, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.400e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:00:06,377 (trainer:732) INFO: 16epoch:train:1501-1600batch: iter_time=9.767e-05, forward_time=0.108, loss_ctc=70.485, loss_att=56.946, acc=0.687, loss=61.008, backward_time=0.752, grad_norm=93.511, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.397e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 07:01:14,601 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 07:01:33,950 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 07:01:37,876 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 07:01:37,876 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 07:01:37,882 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 07:05:32,440 (trainer:732) INFO: 16epoch:train:1601-1700batch: iter_time=1.290, forward_time=0.108, loss_ctc=76.855, loss_att=60.487, acc=0.696, loss=65.397, backward_time=0.761, grad_norm=104.000, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.394e-05, train_time=6.521 +[gpua003:0/64] 2023-07-06 07:07:12,660 (trainer:732) INFO: 16epoch:train:1701-1800batch: iter_time=9.317e-05, forward_time=0.107, loss_ctc=68.609, loss_att=46.164, acc=0.700, loss=52.897, backward_time=0.753, grad_norm=92.800, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.391e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 07:08:52,753 (trainer:732) INFO: 16epoch:train:1801-1900batch: iter_time=8.951e-05, forward_time=0.107, loss_ctc=67.542, loss_att=52.449, acc=0.694, loss=56.977, backward_time=0.752, grad_norm=85.811, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.387e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 07:10:32,527 (trainer:732) INFO: 16epoch:train:1901-2000batch: iter_time=8.930e-05, forward_time=0.107, loss_ctc=69.790, loss_att=55.626, acc=0.680, loss=59.875, backward_time=0.751, grad_norm=78.275, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.384e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:12:12,369 (trainer:732) INFO: 16epoch:train:2001-2100batch: iter_time=1.018e-04, forward_time=0.108, loss_ctc=69.942, loss_att=52.418, acc=0.688, loss=57.675, backward_time=0.752, grad_norm=83.432, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.381e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 07:13:52,056 (trainer:732) INFO: 16epoch:train:2101-2200batch: iter_time=1.072e-04, forward_time=0.108, loss_ctc=66.694, loss_att=50.173, acc=0.698, loss=55.129, backward_time=0.752, grad_norm=74.273, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.377e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 07:15:31,827 (trainer:732) INFO: 16epoch:train:2201-2300batch: iter_time=1.030e-04, forward_time=0.109, loss_ctc=86.699, loss_att=65.479, acc=0.678, loss=71.845, backward_time=0.753, grad_norm=112.678, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.374e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:17:11,502 (trainer:732) INFO: 16epoch:train:2301-2400batch: iter_time=1.009e-04, forward_time=0.109, loss_ctc=78.962, loss_att=61.486, acc=0.676, loss=66.729, backward_time=0.752, grad_norm=92.485, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.371e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 07:18:51,509 (trainer:732) INFO: 16epoch:train:2401-2500batch: iter_time=1.015e-04, forward_time=0.110, loss_ctc=75.890, loss_att=62.591, acc=0.690, loss=66.581, backward_time=0.754, grad_norm=93.157, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.367e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 07:18:52,779 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 07:19:12,128 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 07:19:15,913 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 07:19:15,913 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 07:19:15,919 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 07:24:48,132 (trainer:732) INFO: 16epoch:train:2501-2600batch: iter_time=1.279, forward_time=0.108, loss_ctc=71.726, loss_att=46.606, acc=0.697, loss=54.142, backward_time=0.767, grad_norm=105.412, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.364e-05, train_time=7.132 +[gpua003:0/64] 2023-07-06 07:26:27,825 (trainer:732) INFO: 16epoch:train:2601-2700batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=66.631, loss_att=51.594, acc=0.698, loss=56.105, backward_time=0.752, grad_norm=90.533, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.361e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 07:28:07,530 (trainer:732) INFO: 16epoch:train:2701-2800batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=70.966, loss_att=55.570, acc=0.680, loss=60.189, backward_time=0.752, grad_norm=86.466, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.358e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 07:29:47,312 (trainer:732) INFO: 16epoch:train:2801-2900batch: iter_time=1.072e-04, forward_time=0.108, loss_ctc=71.097, loss_att=53.450, acc=0.675, loss=58.744, backward_time=0.753, grad_norm=93.208, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.354e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:31:27,133 (trainer:732) INFO: 16epoch:train:2901-3000batch: iter_time=8.955e-05, forward_time=0.108, loss_ctc=63.821, loss_att=46.903, acc=0.701, loss=51.979, backward_time=0.752, grad_norm=74.418, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.351e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 07:33:06,795 (trainer:732) INFO: 16epoch:train:3001-3100batch: iter_time=9.687e-05, forward_time=0.108, loss_ctc=82.866, loss_att=60.932, acc=0.685, loss=67.512, backward_time=0.752, grad_norm=97.470, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.348e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 07:34:46,471 (trainer:732) INFO: 16epoch:train:3101-3200batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=82.047, loss_att=62.386, acc=0.681, loss=68.285, backward_time=0.752, grad_norm=94.406, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.344e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 07:36:26,118 (trainer:732) INFO: 16epoch:train:3201-3300batch: iter_time=9.042e-05, forward_time=0.108, loss_ctc=77.669, loss_att=65.484, acc=0.671, loss=69.140, backward_time=0.752, grad_norm=91.421, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.341e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 07:37:01,224 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 07:37:20,629 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 07:37:24,141 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 07:37:24,141 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 07:37:24,147 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 07:41:53,320 (trainer:732) INFO: 16epoch:train:3301-3400batch: iter_time=2.180, forward_time=0.108, loss_ctc=78.290, loss_att=55.680, acc=0.688, loss=62.463, backward_time=0.766, grad_norm=91.637, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.338e-05, train_time=6.544 +[gpua003:0/64] 2023-07-06 07:43:33,605 (trainer:732) INFO: 16epoch:train:3401-3500batch: iter_time=1.037e-04, forward_time=0.107, loss_ctc=68.344, loss_att=49.906, acc=0.712, loss=55.438, backward_time=0.754, grad_norm=81.948, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.335e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 07:45:13,347 (trainer:732) INFO: 16epoch:train:3501-3600batch: iter_time=9.421e-05, forward_time=0.108, loss_ctc=65.417, loss_att=50.435, acc=0.688, loss=54.930, backward_time=0.752, grad_norm=90.718, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.331e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 07:46:53,165 (trainer:732) INFO: 16epoch:train:3601-3700batch: iter_time=1.013e-04, forward_time=0.109, loss_ctc=70.603, loss_att=51.998, acc=0.686, loss=57.579, backward_time=0.751, grad_norm=85.036, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.328e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 07:48:33,178 (trainer:732) INFO: 16epoch:train:3701-3800batch: iter_time=1.098e-04, forward_time=0.109, loss_ctc=65.655, loss_att=51.080, acc=0.697, loss=55.453, backward_time=0.753, grad_norm=88.931, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.325e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 07:50:12,906 (trainer:732) INFO: 16epoch:train:3801-3900batch: iter_time=9.838e-05, forward_time=0.108, loss_ctc=74.297, loss_att=55.407, acc=0.684, loss=61.074, backward_time=0.751, grad_norm=82.975, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.322e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 07:51:56,731 (trainer:732) INFO: 16epoch:train:3901-4000batch: iter_time=9.921e-05, forward_time=0.109, loss_ctc=84.998, loss_att=61.759, acc=0.691, loss=68.731, backward_time=0.760, grad_norm=94.665, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.318e-05, train_time=2.076 +[gpua003:0/64] 2023-07-06 07:53:42,883 (trainer:732) INFO: 16epoch:train:4001-4100batch: iter_time=1.143e-04, forward_time=0.109, loss_ctc=75.067, loss_att=62.875, acc=0.670, loss=66.533, backward_time=0.758, grad_norm=91.761, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.315e-05, train_time=2.123 +[gpua003:0/64] 2023-07-06 07:54:50,953 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 07:55:10,233 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 07:55:13,721 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 07:55:13,721 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 07:55:13,728 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 07:58:55,908 (trainer:732) INFO: 16epoch:train:4101-4200batch: iter_time=2.076, forward_time=0.141, loss_ctc=74.642, loss_att=55.845, acc=0.693, loss=61.484, backward_time=0.761, grad_norm=97.003, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.114, optim0_lr0=9.312e-05, train_time=6.260 +[gpua003:0/64] 2023-07-06 08:00:37,112 (trainer:732) INFO: 16epoch:train:4201-4300batch: iter_time=1.039e-04, forward_time=0.107, loss_ctc=67.813, loss_att=44.642, acc=0.709, loss=51.593, backward_time=0.754, grad_norm=88.191, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.111, optim0_lr0=9.309e-05, train_time=2.024 +[gpua003:0/64] 2023-07-06 08:02:16,904 (trainer:732) INFO: 16epoch:train:4301-4400batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=68.215, loss_att=52.130, acc=0.696, loss=56.955, backward_time=0.752, grad_norm=94.395, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.306e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 08:03:56,527 (trainer:732) INFO: 16epoch:train:4401-4500batch: iter_time=9.344e-05, forward_time=0.106, loss_ctc=69.906, loss_att=54.891, acc=0.683, loss=59.395, backward_time=0.751, grad_norm=83.039, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.302e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 08:05:37,831 (trainer:732) INFO: 16epoch:train:4501-4600batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=68.748, loss_att=51.394, acc=0.689, loss=56.600, backward_time=0.755, grad_norm=93.992, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.299e-05, train_time=2.026 +[gpua003:0/64] 2023-07-06 08:07:17,508 (trainer:732) INFO: 16epoch:train:4601-4700batch: iter_time=1.059e-04, forward_time=0.107, loss_ctc=65.443, loss_att=48.597, acc=0.702, loss=53.651, backward_time=0.752, grad_norm=111.503, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.296e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 08:08:57,106 (trainer:732) INFO: 16epoch:train:4701-4800batch: iter_time=1.018e-04, forward_time=0.107, loss_ctc=84.020, loss_att=64.596, acc=0.679, loss=70.423, backward_time=0.751, grad_norm=127.671, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.293e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 08:10:36,976 (trainer:732) INFO: 16epoch:train:4801-4900batch: iter_time=1.060e-04, forward_time=0.108, loss_ctc=76.921, loss_att=58.835, acc=0.684, loss=64.261, backward_time=0.752, grad_norm=114.068, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.289e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 08:12:16,723 (trainer:732) INFO: 16epoch:train:4901-5000batch: iter_time=1.049e-04, forward_time=0.109, loss_ctc=74.951, loss_att=61.324, acc=0.694, loss=65.412, backward_time=0.751, grad_norm=92.245, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.286e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 08:12:18,610 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 08:12:37,768 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 08:12:41,328 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 08:12:41,328 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 08:12:41,335 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 08:18:49,163 (trainer:732) INFO: 16epoch:train:5001-5100batch: iter_time=1.303, forward_time=0.116, loss_ctc=72.397, loss_att=47.284, acc=0.706, loss=54.818, backward_time=0.768, grad_norm=87.352, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.283e-05, train_time=7.848 +[gpua003:0/64] 2023-07-06 08:20:28,927 (trainer:732) INFO: 16epoch:train:5101-5200batch: iter_time=1.050e-04, forward_time=0.109, loss_ctc=69.999, loss_att=51.235, acc=0.705, loss=56.864, backward_time=0.750, grad_norm=90.609, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.280e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 08:22:09,028 (trainer:732) INFO: 16epoch:train:5201-5300batch: iter_time=8.618e-05, forward_time=0.109, loss_ctc=62.913, loss_att=49.293, acc=0.704, loss=53.379, backward_time=0.753, grad_norm=77.097, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.277e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 08:23:48,933 (trainer:732) INFO: 16epoch:train:5301-5400batch: iter_time=8.628e-05, forward_time=0.109, loss_ctc=72.795, loss_att=55.439, acc=0.694, loss=60.646, backward_time=0.752, grad_norm=88.288, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.273e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 08:25:28,752 (trainer:732) INFO: 16epoch:train:5401-5500batch: iter_time=9.338e-05, forward_time=0.109, loss_ctc=64.718, loss_att=47.439, acc=0.702, loss=52.623, backward_time=0.752, grad_norm=82.629, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.270e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 08:27:08,812 (trainer:732) INFO: 16epoch:train:5501-5600batch: iter_time=8.449e-05, forward_time=0.108, loss_ctc=78.843, loss_att=57.892, acc=0.696, loss=64.177, backward_time=0.754, grad_norm=92.290, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.267e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 08:28:55,560 (trainer:732) INFO: 16epoch:train:5601-5700batch: iter_time=8.973e-05, forward_time=0.107, loss_ctc=83.966, loss_att=62.853, acc=0.700, loss=69.187, backward_time=0.778, grad_norm=96.335, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.264e-05, train_time=2.135 +[gpua003:0/64] 2023-07-06 08:30:43,294 (trainer:732) INFO: 16epoch:train:5701-5800batch: iter_time=8.613e-05, forward_time=0.108, loss_ctc=69.962, loss_att=57.568, acc=0.690, loss=61.286, backward_time=0.762, grad_norm=89.847, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.261e-05, train_time=2.154 +[gpua003:0/64] 2023-07-06 08:31:17,439 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 08:31:36,652 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 08:31:40,135 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 08:31:40,136 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 08:31:40,142 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 08:35:45,247 (trainer:732) INFO: 16epoch:train:5801-5900batch: iter_time=1.276, forward_time=0.107, loss_ctc=80.360, loss_att=57.972, acc=0.693, loss=64.689, backward_time=0.765, grad_norm=106.653, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.258e-05, train_time=6.039 +[gpua003:0/64] 2023-07-06 08:37:25,600 (trainer:732) INFO: 16epoch:train:5901-6000batch: iter_time=9.151e-05, forward_time=0.107, loss_ctc=69.337, loss_att=47.625, acc=0.723, loss=54.139, backward_time=0.753, grad_norm=82.017, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.254e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 08:39:05,439 (trainer:732) INFO: 16epoch:train:6001-6100batch: iter_time=9.170e-05, forward_time=0.107, loss_ctc=65.149, loss_att=50.375, acc=0.696, loss=54.807, backward_time=0.752, grad_norm=92.310, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.251e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 08:40:45,317 (trainer:732) INFO: 16epoch:train:6101-6200batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=70.697, loss_att=51.205, acc=0.700, loss=57.052, backward_time=0.753, grad_norm=86.258, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.248e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 08:42:24,960 (trainer:732) INFO: 16epoch:train:6201-6300batch: iter_time=9.827e-05, forward_time=0.108, loss_ctc=64.757, loss_att=50.584, acc=0.702, loss=54.836, backward_time=0.751, grad_norm=79.471, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.245e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 08:44:04,631 (trainer:732) INFO: 16epoch:train:6301-6400batch: iter_time=1.131e-04, forward_time=0.109, loss_ctc=72.490, loss_att=54.788, acc=0.696, loss=60.098, backward_time=0.751, grad_norm=92.689, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.242e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 08:45:44,133 (trainer:732) INFO: 16epoch:train:6401-6500batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=85.155, loss_att=58.768, acc=0.706, loss=66.684, backward_time=0.749, grad_norm=98.779, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.239e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 08:47:23,850 (trainer:732) INFO: 16epoch:train:6501-6600batch: iter_time=1.083e-04, forward_time=0.109, loss_ctc=73.711, loss_att=60.604, acc=0.688, loss=64.536, backward_time=0.751, grad_norm=84.467, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.235e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 08:48:30,857 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 08:48:50,019 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 08:48:53,511 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 08:48:53,512 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 08:48:53,518 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 08:53:20,677 (trainer:732) INFO: 16epoch:train:6601-6700batch: iter_time=1.289, forward_time=0.108, loss_ctc=74.070, loss_att=54.401, acc=0.697, loss=60.302, backward_time=0.761, grad_norm=86.767, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.232e-05, train_time=7.136 +[gpua003:0/64] 2023-07-06 08:55:01,310 (trainer:732) INFO: 16epoch:train:6701-6800batch: iter_time=1.138e-04, forward_time=0.107, loss_ctc=71.082, loss_att=51.771, acc=0.709, loss=57.564, backward_time=0.755, grad_norm=90.309, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.229e-05, train_time=2.012 +[gpua003:0/64] 2023-07-06 08:56:41,532 (trainer:732) INFO: 16epoch:train:6801-6900batch: iter_time=1.140e-04, forward_time=0.108, loss_ctc=65.828, loss_att=50.568, acc=0.693, loss=55.146, backward_time=0.752, grad_norm=92.849, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.226e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 08:58:21,807 (trainer:732) INFO: 16epoch:train:6901-7000batch: iter_time=1.105e-04, forward_time=0.107, loss_ctc=69.044, loss_att=51.957, acc=0.681, loss=57.083, backward_time=0.751, grad_norm=85.863, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.223e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 09:00:01,383 (trainer:732) INFO: 16epoch:train:7001-7100batch: iter_time=1.136e-04, forward_time=0.108, loss_ctc=66.173, loss_att=51.594, acc=0.692, loss=55.968, backward_time=0.751, grad_norm=107.161, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.220e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 09:01:41,306 (trainer:732) INFO: 16epoch:train:7101-7200batch: iter_time=1.196e-04, forward_time=0.108, loss_ctc=68.998, loss_att=50.099, acc=0.697, loss=55.769, backward_time=0.751, grad_norm=84.671, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.217e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 09:03:21,067 (trainer:732) INFO: 16epoch:train:7201-7300batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=83.622, loss_att=62.831, acc=0.690, loss=69.069, backward_time=0.751, grad_norm=94.437, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.213e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 09:05:00,740 (trainer:732) INFO: 16epoch:train:7301-7400batch: iter_time=1.076e-04, forward_time=0.108, loss_ctc=76.063, loss_att=61.450, acc=0.677, loss=65.834, backward_time=0.751, grad_norm=89.194, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.210e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 09:06:40,479 (trainer:732) INFO: 16epoch:train:7401-7500batch: iter_time=1.026e-04, forward_time=0.108, loss_ctc=74.348, loss_att=63.331, acc=0.691, loss=66.636, backward_time=0.752, grad_norm=94.746, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.207e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 09:06:50,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 09:07:09,872 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 09:07:13,343 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 09:07:13,343 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 09:07:13,350 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 09:12:31,328 (trainer:732) INFO: 16epoch:train:7501-7600batch: iter_time=1.366, forward_time=0.109, loss_ctc=70.718, loss_att=46.675, acc=0.709, loss=53.888, backward_time=0.765, grad_norm=84.044, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.204e-05, train_time=7.017 +[gpua003:0/64] 2023-07-06 09:14:11,524 (trainer:732) INFO: 16epoch:train:7601-7700batch: iter_time=1.096e-04, forward_time=0.109, loss_ctc=71.556, loss_att=49.715, acc=0.711, loss=56.267, backward_time=0.752, grad_norm=84.595, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.201e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 09:15:55,971 (trainer:732) INFO: 16epoch:train:7701-7800batch: iter_time=1.048e-04, forward_time=0.110, loss_ctc=63.324, loss_att=50.467, acc=0.700, loss=54.324, backward_time=0.762, grad_norm=79.834, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.198e-05, train_time=2.089 +[gpua003:0/64] 2023-07-06 09:17:38,069 (trainer:732) INFO: 16epoch:train:7801-7900batch: iter_time=1.107e-04, forward_time=0.109, loss_ctc=74.072, loss_att=55.493, acc=0.692, loss=61.067, backward_time=0.764, grad_norm=89.346, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.195e-05, train_time=2.042 +[gpua003:0/64] 2023-07-06 09:19:24,407 (trainer:732) INFO: 16epoch:train:7901-8000batch: iter_time=1.124e-04, forward_time=0.109, loss_ctc=63.418, loss_att=47.702, acc=0.707, loss=52.417, backward_time=0.766, grad_norm=74.133, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.192e-05, train_time=2.127 +[gpua003:0/64] 2023-07-06 09:21:04,320 (trainer:732) INFO: 16epoch:train:8001-8100batch: iter_time=1.101e-04, forward_time=0.109, loss_ctc=80.173, loss_att=56.871, acc=0.698, loss=63.862, backward_time=0.751, grad_norm=90.058, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.189e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 09:22:51,435 (trainer:732) INFO: 16epoch:train:8101-8200batch: iter_time=1.128e-04, forward_time=0.109, loss_ctc=82.878, loss_att=61.648, acc=0.703, loss=68.017, backward_time=0.757, grad_norm=96.301, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.185e-05, train_time=2.142 +[gpua003:0/64] 2023-07-06 09:24:32,016 (trainer:732) INFO: 16epoch:train:8201-8300batch: iter_time=1.174e-04, forward_time=0.109, loss_ctc=70.061, loss_att=57.659, acc=0.693, loss=61.380, backward_time=0.750, grad_norm=90.693, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.182e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 09:25:08,632 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 09:25:27,808 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 09:25:31,292 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 09:25:31,292 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 09:25:31,298 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 09:30:43,049 (trainer:732) INFO: 16epoch:train:8301-8400batch: iter_time=1.323, forward_time=0.109, loss_ctc=78.228, loss_att=55.459, acc=0.696, loss=62.290, backward_time=0.766, grad_norm=102.805, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.179e-05, train_time=7.420 +[gpua003:0/64] 2023-07-06 09:32:24,140 (trainer:732) INFO: 16epoch:train:8401-8500batch: iter_time=1.051e-04, forward_time=0.107, loss_ctc=65.900, loss_att=49.152, acc=0.721, loss=54.176, backward_time=0.753, grad_norm=86.699, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.176e-05, train_time=2.022 +[gpua003:0/64] 2023-07-06 09:34:03,912 (trainer:732) INFO: 16epoch:train:8501-8600batch: iter_time=9.517e-05, forward_time=0.107, loss_ctc=64.248, loss_att=49.488, acc=0.696, loss=53.916, backward_time=0.752, grad_norm=92.458, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.173e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 09:35:44,278 (trainer:732) INFO: 16epoch:train:8601-8700batch: iter_time=1.134e-04, forward_time=0.109, loss_ctc=68.813, loss_att=50.173, acc=0.693, loss=55.765, backward_time=0.753, grad_norm=79.140, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.170e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 09:37:24,339 (trainer:732) INFO: 16epoch:train:8701-8800batch: iter_time=1.014e-04, forward_time=0.107, loss_ctc=63.927, loss_att=49.180, acc=0.703, loss=53.604, backward_time=0.752, grad_norm=74.079, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.167e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 09:39:04,178 (trainer:732) INFO: 16epoch:train:8801-8900batch: iter_time=9.886e-05, forward_time=0.107, loss_ctc=71.856, loss_att=54.471, acc=0.690, loss=59.686, backward_time=0.751, grad_norm=84.227, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.164e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 09:40:44,106 (trainer:732) INFO: 16epoch:train:8901-9000batch: iter_time=9.990e-05, forward_time=0.108, loss_ctc=83.183, loss_att=60.219, acc=0.697, loss=67.108, backward_time=0.752, grad_norm=105.843, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.161e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 09:42:23,729 (trainer:732) INFO: 16epoch:train:9001-9100batch: iter_time=1.063e-04, forward_time=0.107, loss_ctc=73.879, loss_att=62.193, acc=0.675, loss=65.699, backward_time=0.750, grad_norm=87.852, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.158e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 09:43:30,780 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 09:43:50,013 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 09:43:53,960 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 09:43:53,960 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 09:43:53,967 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 09:47:02,373 (trainer:732) INFO: 16epoch:train:9101-9200batch: iter_time=1.335, forward_time=0.108, loss_ctc=73.193, loss_att=55.038, acc=0.695, loss=60.485, backward_time=0.764, grad_norm=100.991, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.155e-05, train_time=5.573 +[gpua003:0/64] 2023-07-06 09:48:42,805 (trainer:732) INFO: 16epoch:train:9201-9300batch: iter_time=1.082e-04, forward_time=0.108, loss_ctc=70.916, loss_att=49.501, acc=0.717, loss=55.926, backward_time=0.755, grad_norm=111.947, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.152e-05, train_time=2.008 +[gpua003:0/64] 2023-07-06 09:50:23,695 (trainer:732) INFO: 16epoch:train:9301-9400batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=65.130, loss_att=48.335, acc=0.700, loss=53.374, backward_time=0.753, grad_norm=76.035, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.148e-05, train_time=2.018 +[gpua003:0/64] 2023-07-06 09:52:03,613 (trainer:732) INFO: 16epoch:train:9401-9500batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=69.151, loss_att=50.398, acc=0.699, loss=56.024, backward_time=0.752, grad_norm=82.318, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.111, optim0_lr0=9.145e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 09:53:43,866 (trainer:732) INFO: 16epoch:train:9501-9600batch: iter_time=9.826e-05, forward_time=0.108, loss_ctc=64.096, loss_att=50.574, acc=0.700, loss=54.631, backward_time=0.752, grad_norm=76.082, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.142e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 09:55:23,809 (trainer:732) INFO: 16epoch:train:9601-9700batch: iter_time=1.055e-04, forward_time=0.107, loss_ctc=68.502, loss_att=50.633, acc=0.705, loss=55.994, backward_time=0.752, grad_norm=90.453, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.139e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 09:57:03,795 (trainer:732) INFO: 16epoch:train:9701-9800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=83.554, loss_att=60.263, acc=0.706, loss=67.250, backward_time=0.752, grad_norm=121.058, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.136e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 09:58:43,540 (trainer:732) INFO: 16epoch:train:9801-9900batch: iter_time=1.058e-04, forward_time=0.107, loss_ctc=75.767, loss_att=61.630, acc=0.686, loss=65.871, backward_time=0.752, grad_norm=99.766, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.133e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 10:00:23,449 (trainer:732) INFO: 16epoch:train:9901-10000batch: iter_time=1.125e-04, forward_time=0.109, loss_ctc=73.492, loss_att=62.177, acc=0.702, loss=65.572, backward_time=0.752, grad_norm=87.104, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.130e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 10:13:27,455 (trainer:338) INFO: 16epoch results: [train] iter_time=0.188, forward_time=0.109, loss_ctc=72.648, loss_att=54.689, acc=0.694, loss=60.077, backward_time=0.755, grad_norm=91.310, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.286e-05, train_time=2.587, time=3 hours, 35 minutes and 51.24 seconds, total_count=130000, gpu_max_cached_mem_GB=37.775, [valid] loss_ctc=53.067, cer_ctc=0.290, loss_att=44.062, acc=0.664, cer=0.358, wer=0.991, loss=46.763, time=6 minutes and 19.52 seconds, total_count=13662, gpu_max_cached_mem_GB=37.775, [att_plot] time=6 minutes and 30.6 seconds, total_count=0, gpu_max_cached_mem_GB=37.775 +[gpua003:0/64] 2023-07-06 10:13:43,422 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpua003:0/64] 2023-07-06 10:13:43,465 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/11epoch.pth +[gpua003:0/64] 2023-07-06 10:13:43,465 (trainer:272) INFO: 17/100epoch started. Estimated time to finish: 1 week, 6 days and 10 hours +[gpua003:0/64] 2023-07-06 10:13:43,501 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 10:14:02,307 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 10:14:05,803 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 10:14:05,804 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 10:14:05,819 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 10:20:06,308 (trainer:732) INFO: 17epoch:train:1-100batch: iter_time=2.583, forward_time=0.213, loss_ctc=70.673, loss_att=58.961, acc=0.684, loss=62.475, backward_time=0.820, grad_norm=95.879, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.123, optim0_lr0=9.127e-05, train_time=7.656 +[gpua003:0/64] 2023-07-06 10:21:47,983 (trainer:732) INFO: 17epoch:train:101-200batch: iter_time=1.078e-04, forward_time=0.110, loss_ctc=72.857, loss_att=65.701, acc=0.683, loss=67.848, backward_time=0.753, grad_norm=87.783, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.124e-05, train_time=2.034 +[gpua003:0/64] 2023-07-06 10:23:43,718 (trainer:732) INFO: 17epoch:train:201-300batch: iter_time=3.269e-04, forward_time=0.202, loss_ctc=74.298, loss_att=62.216, acc=0.684, loss=65.841, backward_time=0.776, grad_norm=85.905, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.119, optim0_lr0=9.121e-05, train_time=2.314 +[gpua003:0/64] 2023-07-06 10:25:26,949 (trainer:732) INFO: 17epoch:train:301-400batch: iter_time=9.790e-05, forward_time=0.108, loss_ctc=72.162, loss_att=54.531, acc=0.685, loss=59.820, backward_time=0.756, grad_norm=86.861, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.118e-05, train_time=2.064 +[gpua003:0/64] 2023-07-06 10:27:13,540 (trainer:732) INFO: 17epoch:train:401-500batch: iter_time=1.134e-04, forward_time=0.120, loss_ctc=74.989, loss_att=52.743, acc=0.701, loss=59.416, backward_time=0.763, grad_norm=88.929, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.117, optim0_lr0=9.115e-05, train_time=2.132 +[gpua003:0/64] 2023-07-06 10:29:07,314 (trainer:732) INFO: 17epoch:train:501-600batch: iter_time=0.006, forward_time=0.165, loss_ctc=74.139, loss_att=57.626, acc=0.683, loss=62.580, backward_time=0.774, grad_norm=89.735, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.116, optim0_lr0=9.112e-05, train_time=2.275 +[gpua003:0/64] 2023-07-06 10:31:07,776 (trainer:732) INFO: 17epoch:train:601-700batch: iter_time=9.955e-05, forward_time=0.110, loss_ctc=75.682, loss_att=59.754, acc=0.684, loss=64.533, backward_time=0.796, grad_norm=95.065, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.109e-05, train_time=2.409 +[gpua003:0/64] 2023-07-06 10:33:08,425 (trainer:732) INFO: 17epoch:train:701-800batch: iter_time=0.001, forward_time=0.197, loss_ctc=73.468, loss_att=56.545, acc=0.693, loss=61.622, backward_time=0.799, grad_norm=89.781, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.117, optim0_lr0=9.106e-05, train_time=2.411 +[gpua003:0/64] 2023-07-06 10:33:52,355 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 10:34:11,454 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 10:34:14,921 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 10:34:14,921 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 10:34:14,928 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 10:38:42,065 (trainer:732) INFO: 17epoch:train:801-900batch: iter_time=1.652, forward_time=0.108, loss_ctc=78.264, loss_att=59.917, acc=0.689, loss=65.421, backward_time=0.771, grad_norm=105.575, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.103e-05, train_time=6.674 +[gpua003:0/64] 2023-07-06 10:40:21,942 (trainer:732) INFO: 17epoch:train:901-1000batch: iter_time=9.108e-05, forward_time=0.107, loss_ctc=68.668, loss_att=64.753, acc=0.669, loss=65.927, backward_time=0.750, grad_norm=89.386, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.100e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 10:42:01,601 (trainer:732) INFO: 17epoch:train:1001-1100batch: iter_time=9.405e-05, forward_time=0.107, loss_ctc=73.564, loss_att=60.516, acc=0.685, loss=64.430, backward_time=0.750, grad_norm=93.150, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.097e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 10:43:41,772 (trainer:732) INFO: 17epoch:train:1101-1200batch: iter_time=9.308e-05, forward_time=0.107, loss_ctc=70.639, loss_att=56.045, acc=0.686, loss=60.423, backward_time=0.751, grad_norm=87.677, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.094e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 10:45:21,649 (trainer:732) INFO: 17epoch:train:1201-1300batch: iter_time=9.487e-05, forward_time=0.107, loss_ctc=71.051, loss_att=55.501, acc=0.686, loss=60.166, backward_time=0.752, grad_norm=91.712, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.091e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 10:47:01,483 (trainer:732) INFO: 17epoch:train:1301-1400batch: iter_time=9.825e-05, forward_time=0.108, loss_ctc=78.942, loss_att=56.523, acc=0.684, loss=63.249, backward_time=0.752, grad_norm=90.368, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.088e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 10:48:41,812 (trainer:732) INFO: 17epoch:train:1401-1500batch: iter_time=1.015e-04, forward_time=0.114, loss_ctc=76.055, loss_att=56.659, acc=0.684, loss=62.478, backward_time=0.752, grad_norm=88.888, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.085e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 10:50:21,503 (trainer:732) INFO: 17epoch:train:1501-1600batch: iter_time=1.011e-04, forward_time=0.106, loss_ctc=72.092, loss_att=59.329, acc=0.681, loss=63.157, backward_time=0.751, grad_norm=83.857, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.082e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 10:51:28,539 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 10:51:47,761 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 10:51:51,243 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 10:51:51,244 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 10:51:51,250 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 10:55:23,750 (trainer:732) INFO: 17epoch:train:1601-1700batch: iter_time=1.339, forward_time=0.136, loss_ctc=75.727, loss_att=56.568, acc=0.694, loss=62.316, backward_time=0.762, grad_norm=98.984, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.114, optim0_lr0=9.079e-05, train_time=6.043 +[gpua003:0/64] 2023-07-06 10:57:04,140 (trainer:732) INFO: 17epoch:train:1701-1800batch: iter_time=1.070e-04, forward_time=0.110, loss_ctc=65.293, loss_att=58.713, acc=0.683, loss=60.687, backward_time=0.755, grad_norm=89.446, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.076e-05, train_time=2.009 +[gpua003:0/64] 2023-07-06 10:58:44,084 (trainer:732) INFO: 17epoch:train:1801-1900batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=74.486, loss_att=67.076, acc=0.685, loss=69.299, backward_time=0.753, grad_norm=99.307, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.073e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 11:00:24,124 (trainer:732) INFO: 17epoch:train:1901-2000batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=74.341, loss_att=60.362, acc=0.692, loss=64.556, backward_time=0.753, grad_norm=87.026, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.070e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 11:02:04,044 (trainer:732) INFO: 17epoch:train:2001-2100batch: iter_time=1.014e-04, forward_time=0.108, loss_ctc=66.178, loss_att=49.767, acc=0.697, loss=54.690, backward_time=0.753, grad_norm=86.555, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.067e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 11:03:43,843 (trainer:732) INFO: 17epoch:train:2101-2200batch: iter_time=9.901e-05, forward_time=0.109, loss_ctc=77.231, loss_att=58.176, acc=0.697, loss=63.892, backward_time=0.752, grad_norm=93.954, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.064e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:05:23,555 (trainer:732) INFO: 17epoch:train:2201-2300batch: iter_time=9.750e-05, forward_time=0.108, loss_ctc=75.802, loss_att=57.441, acc=0.693, loss=62.949, backward_time=0.751, grad_norm=89.231, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.061e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 11:07:03,243 (trainer:732) INFO: 17epoch:train:2301-2400batch: iter_time=1.062e-04, forward_time=0.108, loss_ctc=70.913, loss_att=52.438, acc=0.689, loss=57.980, backward_time=0.751, grad_norm=110.095, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.058e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 11:09:01,745 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 11:09:21,136 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 11:09:24,685 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 11:09:24,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 11:09:24,692 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 11:12:11,094 (trainer:732) INFO: 17epoch:train:2401-2500batch: iter_time=1.565, forward_time=0.109, loss_ctc=75.267, loss_att=58.496, acc=0.696, loss=63.527, backward_time=0.755, grad_norm=104.336, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.055e-05, train_time=6.157 +[gpua003:0/64] 2023-07-06 11:13:53,368 (trainer:732) INFO: 17epoch:train:2501-2600batch: iter_time=1.102e-04, forward_time=0.108, loss_ctc=70.363, loss_att=57.552, acc=0.686, loss=61.395, backward_time=0.759, grad_norm=92.372, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.052e-05, train_time=2.045 +[gpua003:0/64] 2023-07-06 11:15:33,128 (trainer:732) INFO: 17epoch:train:2601-2700batch: iter_time=1.054e-04, forward_time=0.108, loss_ctc=71.407, loss_att=65.980, acc=0.682, loss=67.608, backward_time=0.750, grad_norm=88.759, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.049e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 11:17:13,072 (trainer:732) INFO: 17epoch:train:2701-2800batch: iter_time=1.124e-04, forward_time=0.108, loss_ctc=73.031, loss_att=60.033, acc=0.686, loss=63.933, backward_time=0.751, grad_norm=87.774, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.046e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 11:18:52,468 (trainer:732) INFO: 17epoch:train:2801-2900batch: iter_time=1.132e-04, forward_time=0.106, loss_ctc=70.091, loss_att=54.398, acc=0.683, loss=59.106, backward_time=0.749, grad_norm=91.603, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.043e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 11:20:32,129 (trainer:732) INFO: 17epoch:train:2901-3000batch: iter_time=1.165e-04, forward_time=0.108, loss_ctc=74.247, loss_att=52.240, acc=0.696, loss=58.842, backward_time=0.750, grad_norm=94.573, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.040e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 11:22:11,683 (trainer:732) INFO: 17epoch:train:3001-3100batch: iter_time=1.201e-04, forward_time=0.107, loss_ctc=73.832, loss_att=58.188, acc=0.682, loss=62.881, backward_time=0.749, grad_norm=91.783, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.037e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 11:23:51,407 (trainer:732) INFO: 17epoch:train:3101-3200batch: iter_time=1.178e-04, forward_time=0.108, loss_ctc=72.306, loss_att=57.425, acc=0.683, loss=61.889, backward_time=0.751, grad_norm=96.037, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.034e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 11:25:30,906 (trainer:732) INFO: 17epoch:train:3201-3300batch: iter_time=1.122e-04, forward_time=0.107, loss_ctc=73.002, loss_att=56.253, acc=0.694, loss=61.278, backward_time=0.750, grad_norm=94.698, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.031e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 11:26:04,198 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 11:26:23,282 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 11:26:26,814 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 11:26:26,814 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 11:26:26,821 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 11:32:52,064 (trainer:732) INFO: 17epoch:train:3301-3400batch: iter_time=1.297, forward_time=0.107, loss_ctc=74.660, loss_att=57.798, acc=0.692, loss=62.857, backward_time=0.766, grad_norm=124.515, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.028e-05, train_time=8.823 +[gpua003:0/64] 2023-07-06 11:34:32,228 (trainer:732) INFO: 17epoch:train:3401-3500batch: iter_time=1.056e-04, forward_time=0.108, loss_ctc=67.842, loss_att=63.769, acc=0.687, loss=64.991, backward_time=0.753, grad_norm=85.874, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.025e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 11:36:12,063 (trainer:732) INFO: 17epoch:train:3501-3600batch: iter_time=9.523e-05, forward_time=0.108, loss_ctc=71.756, loss_att=59.431, acc=0.693, loss=63.129, backward_time=0.752, grad_norm=81.853, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.022e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:37:52,081 (trainer:732) INFO: 17epoch:train:3601-3700batch: iter_time=8.824e-05, forward_time=0.108, loss_ctc=71.855, loss_att=55.535, acc=0.697, loss=60.431, backward_time=0.752, grad_norm=82.300, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.020e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 11:39:37,163 (trainer:732) INFO: 17epoch:train:3701-3800batch: iter_time=9.169e-05, forward_time=0.108, loss_ctc=69.667, loss_att=54.813, acc=0.695, loss=59.269, backward_time=0.759, grad_norm=90.155, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.017e-05, train_time=2.101 +[gpua003:0/64] 2023-07-06 11:41:16,991 (trainer:732) INFO: 17epoch:train:3801-3900batch: iter_time=9.824e-05, forward_time=0.107, loss_ctc=77.697, loss_att=55.817, acc=0.700, loss=62.381, backward_time=0.751, grad_norm=89.894, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.014e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:42:56,825 (trainer:732) INFO: 17epoch:train:3901-4000batch: iter_time=9.875e-05, forward_time=0.107, loss_ctc=73.227, loss_att=54.909, acc=0.698, loss=60.404, backward_time=0.751, grad_norm=98.795, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.011e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:44:38,362 (trainer:732) INFO: 17epoch:train:4001-4100batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=72.543, loss_att=58.175, acc=0.689, loss=62.485, backward_time=0.755, grad_norm=89.796, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.008e-05, train_time=2.031 +[gpua003:0/64] 2023-07-06 11:45:44,663 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 11:46:03,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 11:46:07,363 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 11:46:07,363 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 11:46:07,369 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 11:51:11,190 (trainer:732) INFO: 17epoch:train:4101-4200batch: iter_time=1.302, forward_time=0.107, loss_ctc=75.093, loss_att=55.212, acc=0.696, loss=61.176, backward_time=0.773, grad_norm=88.273, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.005e-05, train_time=7.856 +[gpua003:0/64] 2023-07-06 11:52:51,935 (trainer:732) INFO: 17epoch:train:4201-4300batch: iter_time=9.852e-05, forward_time=0.108, loss_ctc=66.394, loss_att=59.086, acc=0.681, loss=61.279, backward_time=0.754, grad_norm=84.241, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.002e-05, train_time=2.015 +[gpua003:0/64] 2023-07-06 11:54:31,640 (trainer:732) INFO: 17epoch:train:4301-4400batch: iter_time=9.413e-05, forward_time=0.107, loss_ctc=71.953, loss_att=65.497, acc=0.672, loss=67.434, backward_time=0.751, grad_norm=102.244, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.999e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 11:56:11,417 (trainer:732) INFO: 17epoch:train:4401-4500batch: iter_time=9.684e-05, forward_time=0.107, loss_ctc=72.922, loss_att=58.511, acc=0.684, loss=62.834, backward_time=0.751, grad_norm=97.141, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.996e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 11:57:51,231 (trainer:732) INFO: 17epoch:train:4501-4600batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=70.213, loss_att=52.171, acc=0.696, loss=57.583, backward_time=0.751, grad_norm=81.295, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.993e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 11:59:30,948 (trainer:732) INFO: 17epoch:train:4601-4700batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=80.215, loss_att=60.929, acc=0.678, loss=66.715, backward_time=0.751, grad_norm=101.501, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.990e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 12:01:10,656 (trainer:732) INFO: 17epoch:train:4701-4800batch: iter_time=1.032e-04, forward_time=0.107, loss_ctc=72.860, loss_att=54.463, acc=0.685, loss=59.982, backward_time=0.750, grad_norm=86.781, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.987e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 12:02:50,263 (trainer:732) INFO: 17epoch:train:4801-4900batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=72.023, loss_att=55.370, acc=0.691, loss=60.366, backward_time=0.751, grad_norm=80.330, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.985e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 12:04:30,172 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 12:04:49,660 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 12:04:53,209 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 12:04:53,209 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 12:04:53,232 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 12:09:03,264 (trainer:732) INFO: 17epoch:train:4901-5000batch: iter_time=2.218, forward_time=0.107, loss_ctc=69.690, loss_att=56.365, acc=0.679, loss=60.363, backward_time=0.760, grad_norm=86.852, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.982e-05, train_time=7.460 +[gpua003:0/64] 2023-07-06 12:10:45,243 (trainer:732) INFO: 17epoch:train:5001-5100batch: iter_time=9.156e-05, forward_time=0.108, loss_ctc=69.222, loss_att=56.473, acc=0.696, loss=60.298, backward_time=0.760, grad_norm=92.071, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.979e-05, train_time=2.039 +[gpua003:0/64] 2023-07-06 12:12:25,764 (trainer:732) INFO: 17epoch:train:5101-5200batch: iter_time=1.022e-04, forward_time=0.108, loss_ctc=70.202, loss_att=64.051, acc=0.686, loss=65.896, backward_time=0.754, grad_norm=83.538, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.976e-05, train_time=2.010 +[gpua003:0/64] 2023-07-06 12:14:28,497 (trainer:732) INFO: 17epoch:train:5201-5300batch: iter_time=1.115e-04, forward_time=0.108, loss_ctc=72.332, loss_att=60.244, acc=0.687, loss=63.870, backward_time=0.813, grad_norm=91.528, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.973e-05, train_time=2.454 +[gpua003:0/64] 2023-07-06 12:16:08,436 (trainer:732) INFO: 17epoch:train:5301-5400batch: iter_time=8.814e-05, forward_time=0.109, loss_ctc=69.588, loss_att=52.565, acc=0.687, loss=57.672, backward_time=0.753, grad_norm=77.684, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.970e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 12:17:52,763 (trainer:732) INFO: 17epoch:train:5401-5500batch: iter_time=8.935e-05, forward_time=0.109, loss_ctc=73.912, loss_att=52.422, acc=0.696, loss=58.869, backward_time=0.776, grad_norm=84.516, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.967e-05, train_time=2.086 +[gpua003:0/64] 2023-07-06 12:19:32,754 (trainer:732) INFO: 17epoch:train:5501-5600batch: iter_time=8.392e-05, forward_time=0.108, loss_ctc=73.547, loss_att=57.855, acc=0.682, loss=62.562, backward_time=0.753, grad_norm=96.965, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.964e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 12:21:12,375 (trainer:732) INFO: 17epoch:train:5601-5700batch: iter_time=1.111e-04, forward_time=0.108, loss_ctc=71.195, loss_att=56.412, acc=0.687, loss=60.847, backward_time=0.751, grad_norm=116.013, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.961e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 12:22:52,067 (trainer:732) INFO: 17epoch:train:5701-5800batch: iter_time=8.850e-05, forward_time=0.108, loss_ctc=71.813, loss_att=55.666, acc=0.695, loss=60.510, backward_time=0.751, grad_norm=115.475, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.959e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 12:23:25,320 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 12:23:44,500 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 12:23:48,062 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 12:23:48,062 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 12:23:48,068 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 12:29:41,148 (trainer:732) INFO: 17epoch:train:5801-5900batch: iter_time=1.954, forward_time=0.110, loss_ctc=75.608, loss_att=57.604, acc=0.698, loss=63.005, backward_time=0.765, grad_norm=95.989, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.956e-05, train_time=8.181 +[gpua003:0/64] 2023-07-06 12:31:32,248 (trainer:732) INFO: 17epoch:train:5901-6000batch: iter_time=1.021e-04, forward_time=0.110, loss_ctc=65.942, loss_att=62.877, acc=0.691, loss=63.796, backward_time=0.780, grad_norm=90.009, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.953e-05, train_time=2.222 +[gpua003:0/64] 2023-07-06 12:33:12,332 (trainer:732) INFO: 17epoch:train:6001-6100batch: iter_time=1.069e-04, forward_time=0.110, loss_ctc=71.278, loss_att=58.934, acc=0.693, loss=62.637, backward_time=0.753, grad_norm=83.756, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.950e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 12:34:52,381 (trainer:732) INFO: 17epoch:train:6101-6200batch: iter_time=1.058e-04, forward_time=0.110, loss_ctc=70.544, loss_att=53.727, acc=0.704, loss=58.772, backward_time=0.753, grad_norm=79.862, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.947e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 12:36:34,193 (trainer:732) INFO: 17epoch:train:6201-6300batch: iter_time=1.036e-04, forward_time=0.110, loss_ctc=69.284, loss_att=54.871, acc=0.698, loss=59.195, backward_time=0.759, grad_norm=80.439, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.944e-05, train_time=2.036 +[gpua003:0/64] 2023-07-06 12:38:16,533 (trainer:732) INFO: 17epoch:train:6301-6400batch: iter_time=1.054e-04, forward_time=0.110, loss_ctc=76.122, loss_att=54.596, acc=0.703, loss=61.054, backward_time=0.756, grad_norm=92.990, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.941e-05, train_time=2.047 +[gpua003:0/64] 2023-07-06 12:40:03,767 (trainer:732) INFO: 17epoch:train:6401-6500batch: iter_time=1.067e-04, forward_time=0.109, loss_ctc=72.416, loss_att=54.767, acc=0.698, loss=60.062, backward_time=0.760, grad_norm=82.798, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.938e-05, train_time=2.144 +[gpua003:0/64] 2023-07-06 12:41:43,845 (trainer:732) INFO: 17epoch:train:6501-6600batch: iter_time=1.023e-04, forward_time=0.109, loss_ctc=69.309, loss_att=56.242, acc=0.693, loss=60.162, backward_time=0.752, grad_norm=84.956, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.936e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 12:42:53,074 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 12:43:12,502 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 12:43:16,051 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 12:43:16,051 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 12:43:16,058 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 12:48:47,376 (trainer:732) INFO: 17epoch:train:6601-6700batch: iter_time=1.326, forward_time=0.109, loss_ctc=73.679, loss_att=54.146, acc=0.702, loss=60.006, backward_time=0.774, grad_norm=87.071, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.933e-05, train_time=8.470 +[gpua003:0/64] 2023-07-06 12:50:28,471 (trainer:732) INFO: 17epoch:train:6701-6800batch: iter_time=9.504e-05, forward_time=0.109, loss_ctc=66.009, loss_att=56.529, acc=0.690, loss=59.373, backward_time=0.754, grad_norm=86.611, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.930e-05, train_time=2.022 +[gpua003:0/64] 2023-07-06 12:52:08,406 (trainer:732) INFO: 17epoch:train:6801-6900batch: iter_time=9.435e-05, forward_time=0.109, loss_ctc=71.563, loss_att=64.197, acc=0.675, loss=66.407, backward_time=0.752, grad_norm=91.386, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.927e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 12:53:48,230 (trainer:732) INFO: 17epoch:train:6901-7000batch: iter_time=1.083e-04, forward_time=0.108, loss_ctc=70.739, loss_att=56.404, acc=0.690, loss=60.704, backward_time=0.752, grad_norm=77.527, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.924e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 12:55:28,052 (trainer:732) INFO: 17epoch:train:7001-7100batch: iter_time=1.054e-04, forward_time=0.109, loss_ctc=67.590, loss_att=51.543, acc=0.698, loss=56.357, backward_time=0.752, grad_norm=86.798, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.921e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 12:57:07,643 (trainer:732) INFO: 17epoch:train:7101-7200batch: iter_time=1.132e-04, forward_time=0.108, loss_ctc=78.963, loss_att=61.659, acc=0.674, loss=66.850, backward_time=0.751, grad_norm=88.590, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.919e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 12:58:47,336 (trainer:732) INFO: 17epoch:train:7201-7300batch: iter_time=1.007e-04, forward_time=0.109, loss_ctc=71.829, loss_att=53.054, acc=0.688, loss=58.686, backward_time=0.752, grad_norm=99.178, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.916e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 13:00:27,154 (trainer:732) INFO: 17epoch:train:7301-7400batch: iter_time=9.624e-05, forward_time=0.110, loss_ctc=70.807, loss_att=54.983, acc=0.694, loss=59.730, backward_time=0.752, grad_norm=96.232, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.913e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 13:02:07,410 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 13:02:26,581 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 13:02:30,447 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 13:02:30,447 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 13:02:30,453 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 13:06:49,822 (trainer:732) INFO: 17epoch:train:7401-7500batch: iter_time=1.317, forward_time=0.107, loss_ctc=69.012, loss_att=56.036, acc=0.683, loss=59.929, backward_time=0.766, grad_norm=92.081, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.910e-05, train_time=7.653 +[gpua003:0/64] 2023-07-06 13:08:33,082 (trainer:732) INFO: 17epoch:train:7501-7600batch: iter_time=1.091e-04, forward_time=0.109, loss_ctc=70.374, loss_att=56.800, acc=0.695, loss=60.872, backward_time=0.760, grad_norm=93.389, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.907e-05, train_time=2.065 +[gpua003:0/64] 2023-07-06 13:10:13,369 (trainer:732) INFO: 17epoch:train:7601-7700batch: iter_time=1.035e-04, forward_time=0.109, loss_ctc=71.632, loss_att=65.224, acc=0.684, loss=67.146, backward_time=0.753, grad_norm=93.615, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.904e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 13:11:53,366 (trainer:732) INFO: 17epoch:train:7701-7800batch: iter_time=1.042e-04, forward_time=0.108, loss_ctc=71.948, loss_att=59.571, acc=0.687, loss=63.284, backward_time=0.753, grad_norm=89.530, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.902e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 13:13:33,022 (trainer:732) INFO: 17epoch:train:7801-7900batch: iter_time=1.008e-04, forward_time=0.106, loss_ctc=71.162, loss_att=53.846, acc=0.687, loss=59.041, backward_time=0.751, grad_norm=93.637, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.899e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 13:15:12,530 (trainer:732) INFO: 17epoch:train:7901-8000batch: iter_time=1.035e-04, forward_time=0.106, loss_ctc=73.768, loss_att=51.961, acc=0.697, loss=58.503, backward_time=0.750, grad_norm=93.453, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.896e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 13:16:52,321 (trainer:732) INFO: 17epoch:train:8001-8100batch: iter_time=1.051e-04, forward_time=0.107, loss_ctc=72.484, loss_att=57.703, acc=0.684, loss=62.137, backward_time=0.751, grad_norm=92.203, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.893e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 13:18:31,917 (trainer:732) INFO: 17epoch:train:8101-8200batch: iter_time=1.031e-04, forward_time=0.106, loss_ctc=69.942, loss_att=55.765, acc=0.690, loss=60.018, backward_time=0.751, grad_norm=85.911, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.890e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 13:20:16,333 (trainer:732) INFO: 17epoch:train:8201-8300batch: iter_time=1.102e-04, forward_time=0.113, loss_ctc=69.944, loss_att=53.888, acc=0.702, loss=58.705, backward_time=0.762, grad_norm=93.148, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.887e-05, train_time=2.088 +[gpua003:0/64] 2023-07-06 13:20:50,086 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 13:21:09,066 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 13:21:12,570 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 13:21:12,570 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 13:21:12,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 13:25:44,808 (trainer:732) INFO: 17epoch:train:8301-8400batch: iter_time=1.287, forward_time=0.108, loss_ctc=72.637, loss_att=55.648, acc=0.694, loss=60.745, backward_time=0.776, grad_norm=93.234, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.885e-05, train_time=6.569 +[gpua003:0/64] 2023-07-06 13:27:26,151 (trainer:732) INFO: 17epoch:train:8401-8500batch: iter_time=9.020e-05, forward_time=0.108, loss_ctc=71.200, loss_att=66.006, acc=0.691, loss=67.564, backward_time=0.752, grad_norm=84.887, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.882e-05, train_time=2.027 +[gpua003:0/64] 2023-07-06 13:29:06,443 (trainer:732) INFO: 17epoch:train:8501-8600batch: iter_time=9.487e-05, forward_time=0.108, loss_ctc=69.241, loss_att=57.750, acc=0.691, loss=61.197, backward_time=0.752, grad_norm=80.923, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.879e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 13:30:46,540 (trainer:732) INFO: 17epoch:train:8601-8700batch: iter_time=9.387e-05, forward_time=0.108, loss_ctc=71.052, loss_att=54.608, acc=0.696, loss=59.541, backward_time=0.752, grad_norm=81.533, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.876e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 13:32:26,237 (trainer:732) INFO: 17epoch:train:8701-8800batch: iter_time=8.946e-05, forward_time=0.107, loss_ctc=68.519, loss_att=53.218, acc=0.703, loss=57.809, backward_time=0.751, grad_norm=87.296, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.873e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 13:34:06,088 (trainer:732) INFO: 17epoch:train:8801-8900batch: iter_time=9.110e-05, forward_time=0.108, loss_ctc=75.321, loss_att=56.064, acc=0.703, loss=61.841, backward_time=0.752, grad_norm=83.253, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.871e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 13:35:45,773 (trainer:732) INFO: 17epoch:train:8901-9000batch: iter_time=8.413e-05, forward_time=0.107, loss_ctc=71.840, loss_att=52.405, acc=0.690, loss=58.236, backward_time=0.751, grad_norm=98.759, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.868e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 13:37:25,621 (trainer:732) INFO: 17epoch:train:9001-9100batch: iter_time=8.465e-05, forward_time=0.107, loss_ctc=67.828, loss_att=57.185, acc=0.695, loss=60.378, backward_time=0.753, grad_norm=87.054, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.865e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 13:38:32,627 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 13:38:52,045 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 13:38:55,588 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 13:38:55,588 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 13:38:55,594 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 13:43:25,159 (trainer:732) INFO: 17epoch:train:9101-9200batch: iter_time=1.294, forward_time=0.108, loss_ctc=73.928, loss_att=56.389, acc=0.694, loss=61.651, backward_time=0.764, grad_norm=128.846, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.862e-05, train_time=7.191 +[gpua003:0/64] 2023-07-06 13:45:08,132 (trainer:732) INFO: 17epoch:train:9201-9300batch: iter_time=9.071e-05, forward_time=0.106, loss_ctc=66.150, loss_att=58.093, acc=0.695, loss=60.510, backward_time=0.763, grad_norm=85.331, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.860e-05, train_time=2.059 +[gpua003:0/64] 2023-07-06 13:46:48,580 (trainer:732) INFO: 17epoch:train:9301-9400batch: iter_time=9.128e-05, forward_time=0.106, loss_ctc=69.745, loss_att=63.280, acc=0.688, loss=65.219, backward_time=0.751, grad_norm=101.929, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.857e-05, train_time=2.009 +[gpua003:0/64] 2023-07-06 13:48:37,657 (trainer:732) INFO: 17epoch:train:9401-9500batch: iter_time=8.943e-05, forward_time=0.106, loss_ctc=70.169, loss_att=55.042, acc=0.701, loss=59.580, backward_time=0.766, grad_norm=83.719, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.854e-05, train_time=2.181 +[gpua003:0/64] 2023-07-06 13:50:17,323 (trainer:732) INFO: 17epoch:train:9501-9600batch: iter_time=9.534e-05, forward_time=0.106, loss_ctc=68.339, loss_att=52.207, acc=0.704, loss=57.047, backward_time=0.751, grad_norm=77.700, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.851e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 13:51:57,215 (trainer:732) INFO: 17epoch:train:9601-9700batch: iter_time=9.239e-05, forward_time=0.106, loss_ctc=78.747, loss_att=60.575, acc=0.695, loss=66.026, backward_time=0.751, grad_norm=98.273, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.848e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 13:53:36,907 (trainer:732) INFO: 17epoch:train:9701-9800batch: iter_time=9.160e-05, forward_time=0.107, loss_ctc=71.624, loss_att=54.061, acc=0.696, loss=59.330, backward_time=0.750, grad_norm=94.224, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.846e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 13:55:16,555 (trainer:732) INFO: 17epoch:train:9801-9900batch: iter_time=8.975e-05, forward_time=0.106, loss_ctc=70.198, loss_att=52.936, acc=0.705, loss=58.115, backward_time=0.751, grad_norm=84.449, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.843e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 13:56:56,201 (trainer:732) INFO: 17epoch:train:9901-10000batch: iter_time=8.856e-05, forward_time=0.107, loss_ctc=71.418, loss_att=58.211, acc=0.687, loss=62.173, backward_time=0.750, grad_norm=99.355, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.840e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 14:09:08,370 (trainer:338) INFO: 17epoch results: [train] iter_time=0.192, forward_time=0.112, loss_ctc=72.034, loss_att=57.326, acc=0.690, loss=61.738, backward_time=0.758, grad_norm=91.386, clip=100.000, loss_scale=2.342e+16, optim_step_time=0.113, optim0_lr0=8.981e-05, train_time=2.678, time=3 hours, 43 minutes and 23.37 seconds, total_count=140000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=58.336, cer_ctc=0.306, loss_att=49.239, acc=0.657, cer=0.353, wer=0.988, loss=51.968, time=5 minutes and 55.68 seconds, total_count=14674, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 5.85 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-06 14:09:27,436 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-06 14:09:27,626 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/12epoch.pth +[gpua003:0/64] 2023-07-06 14:09:27,714 (trainer:272) INFO: 18/100epoch started. Estimated time to finish: 1 week, 6 days and 8 hours +[gpua003:0/64] 2023-07-06 14:09:29,063 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 14:09:48,025 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 14:09:53,240 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 14:09:53,240 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 14:09:53,338 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 14:19:30,835 (trainer:732) INFO: 18epoch:train:1-100batch: iter_time=4.929, forward_time=0.153, loss_ctc=76.115, loss_att=63.275, acc=0.682, loss=67.127, backward_time=0.773, grad_norm=114.970, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.116, optim0_lr0=8.837e-05, train_time=12.048 +[gpua003:0/64] 2023-07-06 14:21:10,892 (trainer:732) INFO: 18epoch:train:101-200batch: iter_time=1.040e-04, forward_time=0.108, loss_ctc=78.407, loss_att=64.441, acc=0.685, loss=68.631, backward_time=0.752, grad_norm=128.032, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.835e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 14:22:53,009 (trainer:732) INFO: 18epoch:train:201-300batch: iter_time=9.913e-05, forward_time=0.108, loss_ctc=66.363, loss_att=49.349, acc=0.706, loss=54.453, backward_time=0.751, grad_norm=82.992, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.832e-05, train_time=2.042 +[gpua003:0/64] 2023-07-06 14:24:32,622 (trainer:732) INFO: 18epoch:train:301-400batch: iter_time=8.736e-05, forward_time=0.107, loss_ctc=85.395, loss_att=58.982, acc=0.692, loss=66.906, backward_time=0.752, grad_norm=105.306, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.829e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 14:26:14,069 (trainer:732) INFO: 18epoch:train:401-500batch: iter_time=8.796e-05, forward_time=0.107, loss_ctc=74.067, loss_att=58.678, acc=0.674, loss=63.295, backward_time=0.752, grad_norm=98.618, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.826e-05, train_time=2.029 +[gpua003:0/64] 2023-07-06 14:27:54,124 (trainer:732) INFO: 18epoch:train:501-600batch: iter_time=9.281e-05, forward_time=0.107, loss_ctc=85.710, loss_att=69.303, acc=0.665, loss=74.225, backward_time=0.753, grad_norm=104.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.824e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 14:29:33,936 (trainer:732) INFO: 18epoch:train:601-700batch: iter_time=8.948e-05, forward_time=0.107, loss_ctc=68.980, loss_att=52.042, acc=0.680, loss=57.123, backward_time=0.752, grad_norm=124.515, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.821e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 14:31:21,706 (trainer:732) INFO: 18epoch:train:701-800batch: iter_time=3.960e-04, forward_time=0.142, loss_ctc=86.717, loss_att=65.821, acc=0.661, loss=72.090, backward_time=0.767, grad_norm=105.973, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.818e-05, train_time=2.152 +[gpua003:0/64] 2023-07-06 14:32:09,857 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 14:32:28,641 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 14:32:32,360 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 14:32:32,360 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 14:32:32,366 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 14:36:07,379 (trainer:732) INFO: 18epoch:train:801-900batch: iter_time=1.622, forward_time=0.152, loss_ctc=74.970, loss_att=58.171, acc=0.683, loss=63.211, backward_time=0.778, grad_norm=89.744, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.115, optim0_lr0=8.815e-05, train_time=5.717 +[gpua003:0/64] 2023-07-06 14:37:47,747 (trainer:732) INFO: 18epoch:train:901-1000batch: iter_time=9.918e-05, forward_time=0.108, loss_ctc=75.573, loss_att=63.432, acc=0.686, loss=67.074, backward_time=0.752, grad_norm=104.391, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.813e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 14:39:27,570 (trainer:732) INFO: 18epoch:train:1001-1100batch: iter_time=9.562e-05, forward_time=0.108, loss_ctc=71.935, loss_att=56.850, acc=0.696, loss=61.375, backward_time=0.751, grad_norm=93.037, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.810e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 14:41:07,498 (trainer:732) INFO: 18epoch:train:1101-1200batch: iter_time=1.016e-04, forward_time=0.109, loss_ctc=73.347, loss_att=52.973, acc=0.703, loss=59.086, backward_time=0.751, grad_norm=99.082, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.807e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 14:42:47,249 (trainer:732) INFO: 18epoch:train:1201-1300batch: iter_time=9.793e-05, forward_time=0.108, loss_ctc=83.048, loss_att=61.707, acc=0.686, loss=68.110, backward_time=0.750, grad_norm=102.821, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.804e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 14:44:27,022 (trainer:732) INFO: 18epoch:train:1301-1400batch: iter_time=1.028e-04, forward_time=0.108, loss_ctc=78.082, loss_att=59.145, acc=0.672, loss=64.826, backward_time=0.751, grad_norm=120.118, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.802e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 14:46:06,792 (trainer:732) INFO: 18epoch:train:1401-1500batch: iter_time=1.063e-04, forward_time=0.109, loss_ctc=77.767, loss_att=61.606, acc=0.674, loss=66.454, backward_time=0.752, grad_norm=122.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.799e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 14:47:46,697 (trainer:732) INFO: 18epoch:train:1501-1600batch: iter_time=9.485e-05, forward_time=0.110, loss_ctc=73.758, loss_att=56.709, acc=0.684, loss=61.824, backward_time=0.753, grad_norm=95.013, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.796e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 14:48:53,758 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 14:49:13,084 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 14:49:16,877 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 14:49:16,877 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 14:49:16,883 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 14:53:39,624 (trainer:732) INFO: 18epoch:train:1601-1700batch: iter_time=1.292, forward_time=0.109, loss_ctc=77.398, loss_att=58.944, acc=0.666, loss=64.480, backward_time=0.761, grad_norm=104.361, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.794e-05, train_time=7.058 +[gpua003:0/64] 2023-07-06 14:55:19,937 (trainer:732) INFO: 18epoch:train:1701-1800batch: iter_time=1.040e-04, forward_time=0.108, loss_ctc=72.292, loss_att=58.658, acc=0.692, loss=62.748, backward_time=0.754, grad_norm=90.979, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.791e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 14:57:00,068 (trainer:732) INFO: 18epoch:train:1801-1900batch: iter_time=9.482e-05, forward_time=0.108, loss_ctc=75.435, loss_att=59.110, acc=0.697, loss=64.008, backward_time=0.753, grad_norm=94.982, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.788e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 14:58:41,886 (trainer:732) INFO: 18epoch:train:1901-2000batch: iter_time=8.814e-05, forward_time=0.108, loss_ctc=66.463, loss_att=50.107, acc=0.714, loss=55.014, backward_time=0.753, grad_norm=94.405, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.785e-05, train_time=2.036 +[gpua003:0/64] 2023-07-06 15:00:21,511 (trainer:732) INFO: 18epoch:train:2001-2100batch: iter_time=8.940e-05, forward_time=0.108, loss_ctc=87.948, loss_att=64.112, acc=0.676, loss=71.263, backward_time=0.751, grad_norm=117.699, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.783e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 15:02:01,222 (trainer:732) INFO: 18epoch:train:2101-2200batch: iter_time=8.983e-05, forward_time=0.108, loss_ctc=78.447, loss_att=60.484, acc=0.681, loss=65.873, backward_time=0.752, grad_norm=100.916, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.780e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 15:03:48,764 (trainer:732) INFO: 18epoch:train:2201-2300batch: iter_time=9.119e-05, forward_time=0.107, loss_ctc=79.251, loss_att=62.785, acc=0.668, loss=67.725, backward_time=0.760, grad_norm=93.330, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.777e-05, train_time=2.151 +[gpua003:0/64] 2023-07-06 15:05:58,569 (trainer:732) INFO: 18epoch:train:2301-2400batch: iter_time=8.569e-05, forward_time=0.108, loss_ctc=66.068, loss_att=48.122, acc=0.690, loss=53.506, backward_time=0.810, grad_norm=98.868, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.775e-05, train_time=2.596 +[gpua003:0/64] 2023-07-06 15:08:21,871 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 15:08:41,122 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 15:08:44,919 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 15:08:44,919 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 15:08:44,925 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 15:11:51,642 (trainer:732) INFO: 18epoch:train:2401-2500batch: iter_time=1.318, forward_time=0.127, loss_ctc=84.958, loss_att=62.041, acc=0.667, loss=68.916, backward_time=0.850, grad_norm=104.647, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.772e-05, train_time=7.061 +[gpua003:0/64] 2023-07-06 15:13:33,609 (trainer:732) INFO: 18epoch:train:2501-2600batch: iter_time=1.008e-04, forward_time=0.108, loss_ctc=75.155, loss_att=62.287, acc=0.691, loss=66.148, backward_time=0.764, grad_norm=96.943, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.769e-05, train_time=2.039 +[gpua003:0/64] 2023-07-06 15:15:13,429 (trainer:732) INFO: 18epoch:train:2601-2700batch: iter_time=1.057e-04, forward_time=0.108, loss_ctc=76.932, loss_att=63.507, acc=0.694, loss=67.535, backward_time=0.750, grad_norm=96.917, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.766e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 15:16:53,331 (trainer:732) INFO: 18epoch:train:2701-2800batch: iter_time=1.094e-04, forward_time=0.108, loss_ctc=62.411, loss_att=46.895, acc=0.715, loss=51.550, backward_time=0.752, grad_norm=92.496, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.764e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 15:18:33,060 (trainer:732) INFO: 18epoch:train:2801-2900batch: iter_time=9.940e-05, forward_time=0.108, loss_ctc=81.354, loss_att=57.151, acc=0.696, loss=64.412, backward_time=0.751, grad_norm=109.033, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.761e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 15:20:12,823 (trainer:732) INFO: 18epoch:train:2901-3000batch: iter_time=1.129e-04, forward_time=0.108, loss_ctc=73.741, loss_att=58.586, acc=0.679, loss=63.132, backward_time=0.752, grad_norm=91.478, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.758e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:21:52,679 (trainer:732) INFO: 18epoch:train:3001-3100batch: iter_time=1.015e-04, forward_time=0.109, loss_ctc=83.727, loss_att=65.874, acc=0.669, loss=71.230, backward_time=0.752, grad_norm=106.440, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.756e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 15:23:32,451 (trainer:732) INFO: 18epoch:train:3101-3200batch: iter_time=1.030e-04, forward_time=0.109, loss_ctc=67.147, loss_att=49.955, acc=0.689, loss=55.113, backward_time=0.751, grad_norm=107.665, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.753e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:25:14,937 (trainer:732) INFO: 18epoch:train:3201-3300batch: iter_time=9.253e-05, forward_time=0.108, loss_ctc=81.896, loss_att=61.574, acc=0.673, loss=67.671, backward_time=0.753, grad_norm=105.660, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.750e-05, train_time=2.049 +[gpua003:0/64] 2023-07-06 15:25:48,262 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 15:26:07,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 15:26:11,328 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 15:26:11,328 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 15:26:11,334 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 15:29:42,037 (trainer:732) INFO: 18epoch:train:3301-3400batch: iter_time=1.323, forward_time=0.110, loss_ctc=72.978, loss_att=56.716, acc=0.686, loss=61.595, backward_time=0.775, grad_norm=91.640, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.748e-05, train_time=5.342 +[gpua003:0/64] 2023-07-06 15:31:22,262 (trainer:732) INFO: 18epoch:train:3401-3500batch: iter_time=1.067e-04, forward_time=0.108, loss_ctc=72.957, loss_att=62.528, acc=0.685, loss=65.657, backward_time=0.753, grad_norm=88.960, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.745e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 15:33:02,184 (trainer:732) INFO: 18epoch:train:3501-3600batch: iter_time=1.066e-04, forward_time=0.109, loss_ctc=67.727, loss_att=54.922, acc=0.697, loss=58.763, backward_time=0.753, grad_norm=96.147, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.742e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 15:34:41,830 (trainer:732) INFO: 18epoch:train:3601-3700batch: iter_time=1.085e-04, forward_time=0.108, loss_ctc=72.865, loss_att=53.783, acc=0.699, loss=59.507, backward_time=0.752, grad_norm=105.810, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.740e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 15:36:21,533 (trainer:732) INFO: 18epoch:train:3701-3800batch: iter_time=1.178e-04, forward_time=0.109, loss_ctc=78.415, loss_att=59.688, acc=0.681, loss=65.306, backward_time=0.752, grad_norm=94.163, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.737e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 15:38:01,291 (trainer:732) INFO: 18epoch:train:3801-3900batch: iter_time=1.163e-04, forward_time=0.109, loss_ctc=74.930, loss_att=57.824, acc=0.677, loss=62.956, backward_time=0.752, grad_norm=98.833, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.734e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:39:41,058 (trainer:732) INFO: 18epoch:train:3901-4000batch: iter_time=1.116e-04, forward_time=0.109, loss_ctc=73.796, loss_att=59.813, acc=0.674, loss=64.008, backward_time=0.753, grad_norm=105.954, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.732e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:41:20,935 (trainer:732) INFO: 18epoch:train:4001-4100batch: iter_time=9.829e-05, forward_time=0.109, loss_ctc=71.016, loss_att=54.671, acc=0.686, loss=59.574, backward_time=0.753, grad_norm=88.325, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.729e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 15:42:27,084 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 15:42:46,118 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 15:42:49,698 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 15:42:49,698 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 15:42:49,704 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 15:46:38,959 (trainer:732) INFO: 18epoch:train:4101-4200batch: iter_time=1.311, forward_time=0.109, loss_ctc=82.633, loss_att=57.106, acc=0.674, loss=64.764, backward_time=0.765, grad_norm=108.721, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.726e-05, train_time=6.360 +[gpua003:0/64] 2023-07-06 15:48:19,686 (trainer:732) INFO: 18epoch:train:4201-4300batch: iter_time=9.726e-05, forward_time=0.109, loss_ctc=74.521, loss_att=62.322, acc=0.689, loss=65.982, backward_time=0.757, grad_norm=96.759, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.724e-05, train_time=2.014 +[gpua003:0/64] 2023-07-06 15:49:59,357 (trainer:732) INFO: 18epoch:train:4301-4400batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=68.417, loss_att=54.908, acc=0.700, loss=58.961, backward_time=0.751, grad_norm=87.929, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.721e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 15:51:39,010 (trainer:732) INFO: 18epoch:train:4401-4500batch: iter_time=1.019e-04, forward_time=0.108, loss_ctc=72.963, loss_att=54.105, acc=0.704, loss=59.763, backward_time=0.750, grad_norm=121.911, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.718e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 15:53:18,767 (trainer:732) INFO: 18epoch:train:4501-4600batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=78.466, loss_att=59.773, acc=0.690, loss=65.381, backward_time=0.751, grad_norm=96.401, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.716e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 15:54:58,498 (trainer:732) INFO: 18epoch:train:4601-4700batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=76.432, loss_att=58.796, acc=0.677, loss=64.087, backward_time=0.752, grad_norm=97.092, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.713e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 15:56:38,047 (trainer:732) INFO: 18epoch:train:4701-4800batch: iter_time=9.499e-05, forward_time=0.108, loss_ctc=79.331, loss_att=63.852, acc=0.665, loss=68.495, backward_time=0.750, grad_norm=256.176, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.710e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 15:58:19,964 (trainer:732) INFO: 18epoch:train:4801-4900batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=68.325, loss_att=52.879, acc=0.682, loss=57.513, backward_time=0.751, grad_norm=118.723, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.708e-05, train_time=2.038 +[gpua003:0/64] 2023-07-06 16:00:00,347 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 16:00:19,563 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 16:00:23,071 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 16:00:23,071 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 16:00:23,077 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 16:03:54,812 (trainer:732) INFO: 18epoch:train:4901-5000batch: iter_time=1.281, forward_time=0.108, loss_ctc=83.769, loss_att=60.724, acc=0.667, loss=67.638, backward_time=0.761, grad_norm=148.559, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.705e-05, train_time=6.697 +[gpua003:0/64] 2023-07-06 16:05:37,501 (trainer:732) INFO: 18epoch:train:5001-5100batch: iter_time=9.821e-05, forward_time=0.108, loss_ctc=74.654, loss_att=62.672, acc=0.686, loss=66.267, backward_time=0.760, grad_norm=88.103, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.702e-05, train_time=2.054 +[gpua003:0/64] 2023-07-06 16:07:17,638 (trainer:732) INFO: 18epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=74.717, loss_att=61.342, acc=0.690, loss=65.354, backward_time=0.751, grad_norm=111.773, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.700e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 16:08:57,432 (trainer:732) INFO: 18epoch:train:5201-5300batch: iter_time=1.039e-04, forward_time=0.109, loss_ctc=62.277, loss_att=47.038, acc=0.711, loss=51.609, backward_time=0.752, grad_norm=77.470, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.697e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 16:10:37,101 (trainer:732) INFO: 18epoch:train:5301-5400batch: iter_time=1.087e-04, forward_time=0.108, loss_ctc=81.357, loss_att=56.958, acc=0.687, loss=64.278, backward_time=0.751, grad_norm=148.292, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.695e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 16:12:16,859 (trainer:732) INFO: 18epoch:train:5401-5500batch: iter_time=1.099e-04, forward_time=0.109, loss_ctc=73.380, loss_att=57.785, acc=0.682, loss=62.463, backward_time=0.751, grad_norm=91.302, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.692e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 16:13:56,539 (trainer:732) INFO: 18epoch:train:5501-5600batch: iter_time=1.039e-04, forward_time=0.108, loss_ctc=81.378, loss_att=65.579, acc=0.665, loss=70.319, backward_time=0.751, grad_norm=99.738, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.689e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 16:15:35,974 (trainer:732) INFO: 18epoch:train:5601-5700batch: iter_time=1.127e-04, forward_time=0.108, loss_ctc=65.790, loss_att=48.674, acc=0.695, loss=53.808, backward_time=0.749, grad_norm=111.144, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.687e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 16:17:15,561 (trainer:732) INFO: 18epoch:train:5701-5800batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=82.875, loss_att=60.270, acc=0.669, loss=67.051, backward_time=0.750, grad_norm=114.126, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.684e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 16:17:48,761 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 16:18:08,112 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 16:18:11,617 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 16:18:11,617 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 16:18:11,623 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 16:22:38,235 (trainer:732) INFO: 18epoch:train:5801-5900batch: iter_time=1.313, forward_time=0.109, loss_ctc=73.660, loss_att=59.347, acc=0.685, loss=63.641, backward_time=0.764, grad_norm=132.057, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.681e-05, train_time=6.453 +[gpua003:0/64] 2023-07-06 16:24:18,781 (trainer:732) INFO: 18epoch:train:5901-6000batch: iter_time=1.002e-04, forward_time=0.109, loss_ctc=76.119, loss_att=58.539, acc=0.690, loss=63.813, backward_time=0.754, grad_norm=88.467, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.679e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 16:26:06,138 (trainer:732) INFO: 18epoch:train:6001-6100batch: iter_time=9.305e-05, forward_time=0.109, loss_ctc=67.971, loss_att=52.451, acc=0.704, loss=57.107, backward_time=0.766, grad_norm=95.395, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.676e-05, train_time=2.147 +[gpua003:0/64] 2023-07-06 16:27:49,192 (trainer:732) INFO: 18epoch:train:6101-6200batch: iter_time=9.965e-05, forward_time=0.108, loss_ctc=77.390, loss_att=55.435, acc=0.689, loss=62.021, backward_time=0.756, grad_norm=106.144, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.674e-05, train_time=2.061 +[gpua003:0/64] 2023-07-06 16:29:29,408 (trainer:732) INFO: 18epoch:train:6201-6300batch: iter_time=9.552e-05, forward_time=0.109, loss_ctc=70.826, loss_att=56.281, acc=0.684, loss=60.644, backward_time=0.753, grad_norm=94.037, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.671e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 16:31:09,234 (trainer:732) INFO: 18epoch:train:6301-6400batch: iter_time=9.346e-05, forward_time=0.107, loss_ctc=78.001, loss_att=63.797, acc=0.672, loss=68.058, backward_time=0.751, grad_norm=98.998, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.668e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 16:32:48,924 (trainer:732) INFO: 18epoch:train:6401-6500batch: iter_time=9.280e-05, forward_time=0.108, loss_ctc=72.916, loss_att=53.940, acc=0.675, loss=59.633, backward_time=0.751, grad_norm=86.057, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.666e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 16:34:29,082 (trainer:732) INFO: 18epoch:train:6501-6600batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=74.326, loss_att=55.265, acc=0.677, loss=60.983, backward_time=0.752, grad_norm=94.716, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.663e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 16:35:38,397 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 16:35:57,667 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 16:36:01,099 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 16:36:01,100 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 16:36:01,106 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 16:41:32,144 (trainer:732) INFO: 18epoch:train:6601-6700batch: iter_time=1.282, forward_time=0.107, loss_ctc=83.492, loss_att=58.963, acc=0.681, loss=66.321, backward_time=0.770, grad_norm=107.633, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.661e-05, train_time=8.461 +[gpua003:0/64] 2023-07-06 16:43:17,374 (trainer:732) INFO: 18epoch:train:6701-6800batch: iter_time=9.000e-05, forward_time=0.107, loss_ctc=72.670, loss_att=61.072, acc=0.690, loss=64.551, backward_time=0.767, grad_norm=93.284, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.658e-05, train_time=2.104 +[gpua003:0/64] 2023-07-06 16:44:57,475 (trainer:732) INFO: 18epoch:train:6801-6900batch: iter_time=8.929e-05, forward_time=0.107, loss_ctc=68.519, loss_att=54.104, acc=0.697, loss=58.428, backward_time=0.751, grad_norm=89.012, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.655e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 16:46:44,014 (trainer:732) INFO: 18epoch:train:6901-7000batch: iter_time=8.633e-05, forward_time=0.107, loss_ctc=73.189, loss_att=52.360, acc=0.706, loss=58.609, backward_time=0.755, grad_norm=131.163, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.653e-05, train_time=2.131 +[gpua003:0/64] 2023-07-06 16:48:38,048 (trainer:732) INFO: 18epoch:train:7001-7100batch: iter_time=9.301e-05, forward_time=0.107, loss_ctc=76.911, loss_att=60.205, acc=0.681, loss=65.217, backward_time=0.775, grad_norm=91.195, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.650e-05, train_time=2.280 +[gpua003:0/64] 2023-07-06 16:50:17,837 (trainer:732) INFO: 18epoch:train:7101-7200batch: iter_time=9.519e-05, forward_time=0.107, loss_ctc=73.833, loss_att=56.759, acc=0.685, loss=61.881, backward_time=0.750, grad_norm=97.936, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.648e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 16:51:58,408 (trainer:732) INFO: 18epoch:train:7201-7300batch: iter_time=9.376e-05, forward_time=0.107, loss_ctc=76.637, loss_att=62.319, acc=0.667, loss=66.614, backward_time=0.752, grad_norm=113.058, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.645e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 16:54:05,563 (trainer:732) INFO: 18epoch:train:7301-7400batch: iter_time=1.831e-04, forward_time=0.124, loss_ctc=68.742, loss_att=52.246, acc=0.684, loss=57.195, backward_time=0.816, grad_norm=109.431, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.119, optim0_lr0=8.642e-05, train_time=2.543 +[gpua003:0/64] 2023-07-06 16:55:47,933 (trainer:732) INFO: 18epoch:train:7401-7500batch: iter_time=9.365e-05, forward_time=0.110, loss_ctc=82.184, loss_att=57.189, acc=0.675, loss=64.687, backward_time=0.760, grad_norm=142.708, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.640e-05, train_time=2.047 +[gpua003:0/64] 2023-07-06 16:55:54,106 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 16:56:13,402 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 16:56:16,909 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 16:56:16,910 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 16:56:16,916 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 17:00:34,079 (trainer:732) INFO: 18epoch:train:7501-7600batch: iter_time=1.505, forward_time=0.108, loss_ctc=74.300, loss_att=63.152, acc=0.683, loss=66.497, backward_time=0.765, grad_norm=141.163, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.637e-05, train_time=5.723 +[gpua003:0/64] 2023-07-06 17:02:15,733 (trainer:732) INFO: 18epoch:train:7601-7700batch: iter_time=9.276e-05, forward_time=0.109, loss_ctc=75.341, loss_att=59.650, acc=0.691, loss=64.357, backward_time=0.757, grad_norm=131.218, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.635e-05, train_time=2.033 +[gpua003:0/64] 2023-07-06 17:04:00,532 (trainer:732) INFO: 18epoch:train:7701-7800batch: iter_time=9.513e-05, forward_time=0.108, loss_ctc=62.710, loss_att=47.592, acc=0.714, loss=52.128, backward_time=0.758, grad_norm=100.496, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.632e-05, train_time=2.096 +[gpua003:0/64] 2023-07-06 17:05:41,261 (trainer:732) INFO: 18epoch:train:7801-7900batch: iter_time=1.024e-04, forward_time=0.109, loss_ctc=80.778, loss_att=56.500, acc=0.691, loss=63.783, backward_time=0.753, grad_norm=159.514, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.630e-05, train_time=2.014 +[gpua003:0/64] 2023-07-06 17:07:23,370 (trainer:732) INFO: 18epoch:train:7901-8000batch: iter_time=9.871e-05, forward_time=0.109, loss_ctc=72.426, loss_att=57.186, acc=0.677, loss=61.758, backward_time=0.755, grad_norm=100.381, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.627e-05, train_time=2.042 +[gpua003:0/64] 2023-07-06 17:09:03,079 (trainer:732) INFO: 18epoch:train:8001-8100batch: iter_time=9.808e-05, forward_time=0.108, loss_ctc=81.683, loss_att=65.407, acc=0.673, loss=70.289, backward_time=0.751, grad_norm=103.619, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.624e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 17:10:54,113 (trainer:732) INFO: 18epoch:train:8101-8200batch: iter_time=1.053e-04, forward_time=0.108, loss_ctc=64.899, loss_att=47.680, acc=0.696, loss=52.846, backward_time=0.765, grad_norm=84.417, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.622e-05, train_time=2.220 +[gpua003:0/64] 2023-07-06 17:12:33,937 (trainer:732) INFO: 18epoch:train:8201-8300batch: iter_time=9.519e-05, forward_time=0.107, loss_ctc=80.088, loss_att=59.873, acc=0.669, loss=65.937, backward_time=0.751, grad_norm=140.365, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.619e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 17:13:08,280 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 17:13:27,649 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 17:13:31,147 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 17:13:31,147 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 17:13:31,153 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 17:18:59,187 (trainer:732) INFO: 18epoch:train:8301-8400batch: iter_time=1.294, forward_time=0.107, loss_ctc=72.616, loss_att=56.151, acc=0.688, loss=61.090, backward_time=0.763, grad_norm=159.886, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.617e-05, train_time=7.705 +[gpua003:0/64] 2023-07-06 17:20:40,408 (trainer:732) INFO: 18epoch:train:8401-8500batch: iter_time=1.002e-04, forward_time=0.108, loss_ctc=72.837, loss_att=60.069, acc=0.694, loss=63.899, backward_time=0.752, grad_norm=118.934, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.614e-05, train_time=2.024 +[gpua003:0/64] 2023-07-06 17:22:20,274 (trainer:732) INFO: 18epoch:train:8501-8600batch: iter_time=9.768e-05, forward_time=0.108, loss_ctc=68.421, loss_att=53.892, acc=0.698, loss=58.250, backward_time=0.752, grad_norm=96.100, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.612e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 17:24:14,320 (trainer:732) INFO: 18epoch:train:8601-8700batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=68.857, loss_att=51.084, acc=0.707, loss=56.416, backward_time=0.768, grad_norm=78.940, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.609e-05, train_time=2.281 +[gpua003:0/64] 2023-07-06 17:25:54,219 (trainer:732) INFO: 18epoch:train:8701-8800batch: iter_time=1.021e-04, forward_time=0.108, loss_ctc=79.583, loss_att=60.458, acc=0.683, loss=66.195, backward_time=0.752, grad_norm=99.339, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.607e-05, train_time=1.998 +[gpua003:0/64] 2023-07-06 17:27:34,084 (trainer:732) INFO: 18epoch:train:8801-8900batch: iter_time=9.920e-05, forward_time=0.108, loss_ctc=74.773, loss_att=57.128, acc=0.683, loss=62.421, backward_time=0.753, grad_norm=91.643, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.604e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 17:29:13,700 (trainer:732) INFO: 18epoch:train:8901-9000batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=74.198, loss_att=58.596, acc=0.679, loss=63.277, backward_time=0.750, grad_norm=131.138, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.601e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 17:30:53,462 (trainer:732) INFO: 18epoch:train:9001-9100batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=72.289, loss_att=54.298, acc=0.688, loss=59.695, backward_time=0.750, grad_norm=98.942, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.599e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 17:32:00,571 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 17:32:19,636 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 17:32:23,155 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 17:32:23,155 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 17:32:23,161 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 17:36:57,445 (trainer:732) INFO: 18epoch:train:9101-9200batch: iter_time=1.310, forward_time=0.109, loss_ctc=81.340, loss_att=56.298, acc=0.680, loss=63.811, backward_time=0.761, grad_norm=111.662, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.596e-05, train_time=7.279 +[gpua003:0/64] 2023-07-06 17:38:39,715 (trainer:732) INFO: 18epoch:train:9201-9300batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=73.285, loss_att=63.161, acc=0.686, loss=66.198, backward_time=0.757, grad_norm=88.279, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.594e-05, train_time=2.045 +[gpua003:0/64] 2023-07-06 17:40:23,113 (trainer:732) INFO: 18epoch:train:9301-9400batch: iter_time=1.137e-04, forward_time=0.109, loss_ctc=68.800, loss_att=55.601, acc=0.706, loss=59.561, backward_time=0.756, grad_norm=92.752, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.591e-05, train_time=2.068 +[gpua003:0/64] 2023-07-06 17:42:05,313 (trainer:732) INFO: 18epoch:train:9401-9500batch: iter_time=1.016e-04, forward_time=0.108, loss_ctc=72.284, loss_att=52.694, acc=0.710, loss=58.571, backward_time=0.754, grad_norm=98.040, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.589e-05, train_time=2.044 +[gpua003:0/64] 2023-07-06 17:43:45,031 (trainer:732) INFO: 18epoch:train:9501-9600batch: iter_time=1.102e-04, forward_time=0.109, loss_ctc=76.407, loss_att=59.152, acc=0.693, loss=64.328, backward_time=0.751, grad_norm=111.122, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.586e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 17:45:24,576 (trainer:732) INFO: 18epoch:train:9601-9700batch: iter_time=1.125e-04, forward_time=0.107, loss_ctc=74.718, loss_att=58.743, acc=0.682, loss=63.535, backward_time=0.750, grad_norm=102.896, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.584e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 17:47:08,153 (trainer:732) INFO: 18epoch:train:9701-9800batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=76.130, loss_att=62.394, acc=0.672, loss=66.515, backward_time=0.752, grad_norm=99.445, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.581e-05, train_time=2.071 +[gpua003:0/64] 2023-07-06 17:48:48,960 (trainer:732) INFO: 18epoch:train:9801-9900batch: iter_time=9.027e-05, forward_time=0.108, loss_ctc=67.693, loss_att=51.247, acc=0.689, loss=56.181, backward_time=0.758, grad_norm=101.173, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.579e-05, train_time=2.016 +[gpua003:0/64] 2023-07-06 17:50:28,839 (trainer:732) INFO: 18epoch:train:9901-10000batch: iter_time=8.743e-05, forward_time=0.109, loss_ctc=81.308, loss_att=58.352, acc=0.679, loss=65.239, backward_time=0.752, grad_norm=97.815, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.576e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 18:02:52,400 (trainer:338) INFO: 18epoch results: [train] iter_time=0.198, forward_time=0.110, loss_ctc=75.020, loss_att=57.840, acc=0.686, loss=62.994, backward_time=0.757, grad_norm=106.540, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.705e-05, train_time=2.652, time=3 hours, 41 minutes and 15.98 seconds, total_count=150000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.528, cer_ctc=0.288, loss_att=42.295, acc=0.657, cer=0.376, wer=0.987, loss=44.765, time=5 minutes and 47.09 seconds, total_count=15686, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 21.52 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-06 18:03:10,995 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-06 18:03:11,099 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/13epoch.pth +[gpua003:0/64] 2023-07-06 18:03:11,142 (trainer:272) INFO: 19/100epoch started. Estimated time to finish: 1 week, 6 days and 5 hours +[gpua003:0/64] 2023-07-06 18:03:12,483 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 18:03:31,414 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 18:03:36,058 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 18:03:36,058 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 18:03:36,157 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 18:10:33,322 (trainer:732) INFO: 19epoch:train:1-100batch: iter_time=3.360, forward_time=0.134, loss_ctc=71.878, loss_att=52.487, acc=0.687, loss=58.305, backward_time=0.768, grad_norm=98.210, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.115, optim0_lr0=8.574e-05, train_time=8.830 +[gpua003:0/64] 2023-07-06 18:12:13,736 (trainer:732) INFO: 19epoch:train:101-200batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=73.951, loss_att=54.611, acc=0.685, loss=60.413, backward_time=0.752, grad_norm=97.472, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.571e-05, train_time=2.008 +[gpua003:0/64] 2023-07-06 18:13:54,017 (trainer:732) INFO: 19epoch:train:201-300batch: iter_time=9.643e-05, forward_time=0.108, loss_ctc=71.382, loss_att=53.617, acc=0.688, loss=58.947, backward_time=0.750, grad_norm=79.712, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.569e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 18:15:36,625 (trainer:732) INFO: 19epoch:train:301-400batch: iter_time=9.724e-05, forward_time=0.108, loss_ctc=75.527, loss_att=61.014, acc=0.673, loss=65.368, backward_time=0.757, grad_norm=94.346, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.566e-05, train_time=2.052 +[gpua003:0/64] 2023-07-06 18:17:17,089 (trainer:732) INFO: 19epoch:train:401-500batch: iter_time=9.887e-05, forward_time=0.109, loss_ctc=73.023, loss_att=58.274, acc=0.684, loss=62.699, backward_time=0.751, grad_norm=90.553, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.564e-05, train_time=2.009 +[gpua003:0/64] 2023-07-06 18:19:01,828 (trainer:732) INFO: 19epoch:train:501-600batch: iter_time=8.940e-05, forward_time=0.108, loss_ctc=71.310, loss_att=57.750, acc=0.697, loss=61.818, backward_time=0.756, grad_norm=100.266, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.561e-05, train_time=2.095 +[gpua003:0/64] 2023-07-06 18:21:04,708 (trainer:732) INFO: 19epoch:train:601-700batch: iter_time=9.789e-05, forward_time=0.110, loss_ctc=68.405, loss_att=52.910, acc=0.711, loss=57.558, backward_time=0.796, grad_norm=82.647, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.559e-05, train_time=2.457 +[gpua003:0/64] 2023-07-06 18:22:58,725 (trainer:732) INFO: 19epoch:train:701-800batch: iter_time=1.057e-04, forward_time=0.111, loss_ctc=85.051, loss_att=69.937, acc=0.676, loss=74.471, backward_time=0.799, grad_norm=123.148, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.556e-05, train_time=2.280 +[gpua003:0/64] 2023-07-06 18:23:39,249 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 18:23:57,976 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 18:24:01,673 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 18:24:01,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-06 18:24:01,679 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 18:27:57,894 (trainer:732) INFO: 19epoch:train:801-900batch: iter_time=1.349, forward_time=0.109, loss_ctc=74.949, loss_att=56.631, acc=0.679, loss=62.126, backward_time=0.766, grad_norm=85.017, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.553e-05, train_time=5.983 +[gpua003:0/64] 2023-07-06 18:29:38,263 (trainer:732) INFO: 19epoch:train:901-1000batch: iter_time=1.127e-04, forward_time=0.108, loss_ctc=74.503, loss_att=54.158, acc=0.674, loss=60.262, backward_time=0.751, grad_norm=102.346, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.551e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 18:31:18,016 (trainer:732) INFO: 19epoch:train:1001-1100batch: iter_time=1.100e-04, forward_time=0.108, loss_ctc=74.767, loss_att=56.937, acc=0.684, loss=62.286, backward_time=0.752, grad_norm=82.548, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.548e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 18:32:57,536 (trainer:732) INFO: 19epoch:train:1101-1200batch: iter_time=1.159e-04, forward_time=0.107, loss_ctc=70.675, loss_att=55.035, acc=0.670, loss=59.727, backward_time=0.752, grad_norm=94.405, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.546e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 18:34:37,089 (trainer:732) INFO: 19epoch:train:1201-1300batch: iter_time=1.106e-04, forward_time=0.107, loss_ctc=76.324, loss_att=59.642, acc=0.686, loss=64.647, backward_time=0.751, grad_norm=93.883, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.544e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 18:36:17,028 (trainer:732) INFO: 19epoch:train:1301-1400batch: iter_time=9.751e-05, forward_time=0.109, loss_ctc=67.885, loss_att=54.884, acc=0.685, loss=58.784, backward_time=0.753, grad_norm=91.475, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.541e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 18:37:56,460 (trainer:732) INFO: 19epoch:train:1401-1500batch: iter_time=1.118e-04, forward_time=0.107, loss_ctc=66.717, loss_att=53.410, acc=0.694, loss=57.402, backward_time=0.750, grad_norm=80.970, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.539e-05, train_time=1.988 +[gpua003:0/64] 2023-07-06 18:39:36,031 (trainer:732) INFO: 19epoch:train:1501-1600batch: iter_time=8.864e-05, forward_time=0.108, loss_ctc=81.647, loss_att=67.344, acc=0.685, loss=71.635, backward_time=0.750, grad_norm=114.722, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.536e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 18:40:44,261 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 18:41:03,688 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 18:41:07,475 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 18:41:07,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-06 18:41:07,482 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 18:45:41,292 (trainer:732) INFO: 19epoch:train:1601-1700batch: iter_time=1.309, forward_time=0.108, loss_ctc=77.277, loss_att=60.983, acc=0.668, loss=65.871, backward_time=0.768, grad_norm=93.748, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.534e-05, train_time=7.305 +[gpua003:0/64] 2023-07-06 18:47:21,491 (trainer:732) INFO: 19epoch:train:1701-1800batch: iter_time=1.143e-04, forward_time=0.109, loss_ctc=73.968, loss_att=51.681, acc=0.683, loss=58.367, backward_time=0.753, grad_norm=97.133, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.531e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 18:49:01,195 (trainer:732) INFO: 19epoch:train:1801-1900batch: iter_time=1.050e-04, forward_time=0.108, loss_ctc=73.852, loss_att=56.822, acc=0.683, loss=61.931, backward_time=0.751, grad_norm=95.195, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.529e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 18:50:40,963 (trainer:732) INFO: 19epoch:train:1901-2000batch: iter_time=1.119e-04, forward_time=0.109, loss_ctc=68.398, loss_att=50.868, acc=0.690, loss=56.127, backward_time=0.752, grad_norm=82.918, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.526e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 18:52:20,494 (trainer:732) INFO: 19epoch:train:2001-2100batch: iter_time=1.182e-04, forward_time=0.108, loss_ctc=72.453, loss_att=55.434, acc=0.682, loss=60.540, backward_time=0.750, grad_norm=91.971, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.524e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 18:54:00,169 (trainer:732) INFO: 19epoch:train:2101-2200batch: iter_time=1.126e-04, forward_time=0.109, loss_ctc=70.471, loss_att=60.656, acc=0.681, loss=63.600, backward_time=0.752, grad_norm=92.734, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.521e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 18:55:43,499 (trainer:732) INFO: 19epoch:train:2201-2300batch: iter_time=1.184e-04, forward_time=0.109, loss_ctc=70.887, loss_att=55.798, acc=0.691, loss=60.325, backward_time=0.753, grad_norm=87.370, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.519e-05, train_time=2.066 +[gpua003:0/64] 2023-07-06 18:57:28,468 (trainer:732) INFO: 19epoch:train:2301-2400batch: iter_time=1.142e-04, forward_time=0.108, loss_ctc=74.302, loss_att=62.084, acc=0.687, loss=65.750, backward_time=0.757, grad_norm=90.024, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.516e-05, train_time=2.099 +[gpua003:0/64] 2023-07-06 18:59:09,045 (trainer:732) INFO: 19epoch:train:2401-2500batch: iter_time=9.995e-05, forward_time=0.109, loss_ctc=83.902, loss_att=62.709, acc=0.676, loss=69.067, backward_time=0.757, grad_norm=139.737, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.514e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 18:59:11,328 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 18:59:30,666 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 18:59:34,430 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 18:59:34,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-06 18:59:34,436 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 19:05:02,322 (trainer:732) INFO: 19epoch:train:2501-2600batch: iter_time=1.326, forward_time=0.108, loss_ctc=70.364, loss_att=51.755, acc=0.683, loss=57.338, backward_time=0.793, grad_norm=99.243, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.511e-05, train_time=7.065 +[gpua003:0/64] 2023-07-06 19:06:42,318 (trainer:732) INFO: 19epoch:train:2601-2700batch: iter_time=1.063e-04, forward_time=0.108, loss_ctc=73.763, loss_att=53.729, acc=0.683, loss=59.739, backward_time=0.752, grad_norm=99.427, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.509e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 19:08:21,986 (trainer:732) INFO: 19epoch:train:2701-2800batch: iter_time=9.496e-05, forward_time=0.107, loss_ctc=69.928, loss_att=54.179, acc=0.687, loss=58.904, backward_time=0.752, grad_norm=87.933, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.506e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 19:10:01,778 (trainer:732) INFO: 19epoch:train:2801-2900batch: iter_time=9.847e-05, forward_time=0.108, loss_ctc=70.824, loss_att=55.649, acc=0.683, loss=60.201, backward_time=0.753, grad_norm=104.518, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.504e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 19:11:43,478 (trainer:732) INFO: 19epoch:train:2901-3000batch: iter_time=8.790e-05, forward_time=0.108, loss_ctc=71.744, loss_att=56.572, acc=0.683, loss=61.124, backward_time=0.753, grad_norm=97.646, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.501e-05, train_time=2.034 +[gpua003:0/64] 2023-07-06 19:13:23,321 (trainer:732) INFO: 19epoch:train:3001-3100batch: iter_time=8.518e-05, forward_time=0.108, loss_ctc=68.969, loss_att=56.135, acc=0.697, loss=59.985, backward_time=0.752, grad_norm=93.645, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.499e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 19:15:02,845 (trainer:732) INFO: 19epoch:train:3101-3200batch: iter_time=9.418e-05, forward_time=0.107, loss_ctc=66.684, loss_att=49.410, acc=0.712, loss=54.592, backward_time=0.751, grad_norm=86.075, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.496e-05, train_time=1.990 +[gpua003:0/64] 2023-07-06 19:16:42,465 (trainer:732) INFO: 19epoch:train:3201-3300batch: iter_time=1.012e-04, forward_time=0.108, loss_ctc=83.199, loss_att=70.634, acc=0.670, loss=74.404, backward_time=0.751, grad_norm=111.083, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.494e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 19:17:18,787 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 19:17:38,120 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 19:17:41,665 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 19:17:41,665 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 19:17:41,672 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 19:23:47,034 (trainer:732) INFO: 19epoch:train:3301-3400batch: iter_time=3.115, forward_time=0.165, loss_ctc=73.481, loss_att=53.362, acc=0.685, loss=59.397, backward_time=0.768, grad_norm=89.020, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.115, optim0_lr0=8.492e-05, train_time=8.491 +[gpua003:0/64] 2023-07-06 19:25:27,393 (trainer:732) INFO: 19epoch:train:3401-3500batch: iter_time=1.015e-04, forward_time=0.109, loss_ctc=74.841, loss_att=53.947, acc=0.678, loss=60.215, backward_time=0.753, grad_norm=92.488, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.489e-05, train_time=2.007 +[gpua003:0/64] 2023-07-06 19:27:17,900 (trainer:732) INFO: 19epoch:train:3501-3600batch: iter_time=8.651e-05, forward_time=0.108, loss_ctc=73.019, loss_att=56.463, acc=0.684, loss=61.429, backward_time=0.762, grad_norm=97.312, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.487e-05, train_time=2.210 +[gpua003:0/64] 2023-07-06 19:28:57,771 (trainer:732) INFO: 19epoch:train:3601-3700batch: iter_time=9.262e-05, forward_time=0.108, loss_ctc=70.917, loss_att=54.082, acc=0.678, loss=59.133, backward_time=0.753, grad_norm=99.327, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.484e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 19:30:37,491 (trainer:732) INFO: 19epoch:train:3701-3800batch: iter_time=1.079e-04, forward_time=0.107, loss_ctc=73.384, loss_att=57.937, acc=0.690, loss=62.571, backward_time=0.750, grad_norm=93.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.482e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 19:32:17,208 (trainer:732) INFO: 19epoch:train:3801-3900batch: iter_time=1.142e-04, forward_time=0.108, loss_ctc=67.881, loss_att=55.385, acc=0.682, loss=59.133, backward_time=0.751, grad_norm=104.375, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.479e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 19:33:56,953 (trainer:732) INFO: 19epoch:train:3901-4000batch: iter_time=1.101e-04, forward_time=0.108, loss_ctc=66.750, loss_att=52.965, acc=0.698, loss=57.101, backward_time=0.752, grad_norm=101.994, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.477e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 19:35:36,807 (trainer:732) INFO: 19epoch:train:4001-4100batch: iter_time=9.853e-05, forward_time=0.108, loss_ctc=78.586, loss_att=65.422, acc=0.693, loss=69.371, backward_time=0.753, grad_norm=90.671, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.475e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 19:36:46,521 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 19:37:05,501 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 19:37:09,017 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 19:37:09,017 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-06 19:37:09,023 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 19:42:01,856 (trainer:732) INFO: 19epoch:train:4101-4200batch: iter_time=1.271, forward_time=0.108, loss_ctc=77.470, loss_att=59.834, acc=0.674, loss=65.125, backward_time=0.764, grad_norm=104.225, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.472e-05, train_time=7.701 +[gpua003:0/64] 2023-07-06 19:43:42,839 (trainer:732) INFO: 19epoch:train:4201-4300batch: iter_time=9.875e-05, forward_time=0.108, loss_ctc=72.007, loss_att=52.479, acc=0.697, loss=58.337, backward_time=0.754, grad_norm=104.085, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.470e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 19:45:25,413 (trainer:732) INFO: 19epoch:train:4301-4400batch: iter_time=8.662e-05, forward_time=0.108, loss_ctc=75.337, loss_att=58.219, acc=0.688, loss=63.354, backward_time=0.759, grad_norm=99.286, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.467e-05, train_time=2.051 +[gpua003:0/64] 2023-07-06 19:47:05,267 (trainer:732) INFO: 19epoch:train:4401-4500batch: iter_time=1.128e-04, forward_time=0.108, loss_ctc=68.687, loss_att=50.921, acc=0.699, loss=56.251, backward_time=0.752, grad_norm=83.793, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.465e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 19:48:45,031 (trainer:732) INFO: 19epoch:train:4501-4600batch: iter_time=1.138e-04, forward_time=0.109, loss_ctc=66.702, loss_att=53.486, acc=0.690, loss=57.451, backward_time=0.751, grad_norm=87.055, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.462e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 19:50:28,601 (trainer:732) INFO: 19epoch:train:4601-4700batch: iter_time=1.079e-04, forward_time=0.108, loss_ctc=72.286, loss_att=61.308, acc=0.686, loss=64.601, backward_time=0.756, grad_norm=99.764, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.460e-05, train_time=2.071 +[gpua003:0/64] 2023-07-06 19:52:08,198 (trainer:732) INFO: 19epoch:train:4701-4800batch: iter_time=1.036e-04, forward_time=0.108, loss_ctc=71.867, loss_att=54.172, acc=0.705, loss=59.481, backward_time=0.750, grad_norm=109.482, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.458e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 19:53:47,911 (trainer:732) INFO: 19epoch:train:4801-4900batch: iter_time=1.030e-04, forward_time=0.108, loss_ctc=73.498, loss_att=60.646, acc=0.710, loss=64.501, backward_time=0.751, grad_norm=132.193, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.455e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 19:55:27,532 (trainer:732) INFO: 19epoch:train:4901-5000batch: iter_time=1.088e-04, forward_time=0.108, loss_ctc=81.536, loss_att=64.509, acc=0.679, loss=69.617, backward_time=0.751, grad_norm=111.664, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.453e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 19:55:30,048 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 19:55:48,920 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 19:55:52,427 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 19:55:52,427 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-06 19:55:52,433 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 20:00:40,776 (trainer:732) INFO: 19epoch:train:5001-5100batch: iter_time=1.366, forward_time=0.108, loss_ctc=69.849, loss_att=51.729, acc=0.695, loss=57.165, backward_time=0.763, grad_norm=87.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.450e-05, train_time=6.265 +[gpua003:0/64] 2023-07-06 20:02:20,937 (trainer:732) INFO: 19epoch:train:5101-5200batch: iter_time=9.291e-05, forward_time=0.108, loss_ctc=70.810, loss_att=51.826, acc=0.701, loss=57.521, backward_time=0.752, grad_norm=89.983, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.448e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 20:04:03,241 (trainer:732) INFO: 19epoch:train:5201-5300batch: iter_time=9.771e-05, forward_time=0.109, loss_ctc=70.882, loss_att=52.510, acc=0.700, loss=58.021, backward_time=0.755, grad_norm=78.370, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.445e-05, train_time=2.046 +[gpua003:0/64] 2023-07-06 20:05:51,446 (trainer:732) INFO: 19epoch:train:5301-5400batch: iter_time=9.570e-05, forward_time=0.107, loss_ctc=69.962, loss_att=57.223, acc=0.687, loss=61.045, backward_time=0.760, grad_norm=91.273, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.443e-05, train_time=2.164 +[gpua003:0/64] 2023-07-06 20:07:31,682 (trainer:732) INFO: 19epoch:train:5401-5500batch: iter_time=9.179e-05, forward_time=0.107, loss_ctc=70.911, loss_att=55.955, acc=0.689, loss=60.442, backward_time=0.751, grad_norm=91.242, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.441e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 20:09:13,295 (trainer:732) INFO: 19epoch:train:5501-5600batch: iter_time=9.333e-05, forward_time=0.107, loss_ctc=72.418, loss_att=59.104, acc=0.701, loss=63.098, backward_time=0.754, grad_norm=97.947, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.438e-05, train_time=2.032 +[gpua003:0/64] 2023-07-06 20:10:53,158 (trainer:732) INFO: 19epoch:train:5601-5700batch: iter_time=9.882e-05, forward_time=0.107, loss_ctc=66.916, loss_att=50.106, acc=0.718, loss=55.149, backward_time=0.751, grad_norm=81.033, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.436e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 20:12:34,924 (trainer:732) INFO: 19epoch:train:5701-5800batch: iter_time=9.385e-05, forward_time=0.108, loss_ctc=82.933, loss_att=68.684, acc=0.688, loss=72.959, backward_time=0.756, grad_norm=95.394, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.433e-05, train_time=2.035 +[gpua003:0/64] 2023-07-06 20:13:11,362 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-06 20:13:30,240 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 20:13:33,729 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 20:13:33,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 20:13:33,736 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 20:17:25,315 (trainer:732) INFO: 19epoch:train:5801-5900batch: iter_time=1.332, forward_time=0.109, loss_ctc=72.517, loss_att=53.535, acc=0.694, loss=59.230, backward_time=0.764, grad_norm=91.357, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.431e-05, train_time=5.808 +[gpua003:0/64] 2023-07-06 20:19:05,549 (trainer:732) INFO: 19epoch:train:5901-6000batch: iter_time=9.979e-05, forward_time=0.108, loss_ctc=72.968, loss_att=53.312, acc=0.689, loss=59.209, backward_time=0.753, grad_norm=105.818, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.429e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 20:20:45,322 (trainer:732) INFO: 19epoch:train:6001-6100batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=72.912, loss_att=54.448, acc=0.697, loss=59.987, backward_time=0.751, grad_norm=86.049, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.426e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 20:22:41,206 (trainer:732) INFO: 19epoch:train:6101-6200batch: iter_time=9.789e-05, forward_time=0.115, loss_ctc=67.667, loss_att=52.369, acc=0.694, loss=56.958, backward_time=0.773, grad_norm=79.432, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.424e-05, train_time=2.317 +[gpua003:0/64] 2023-07-06 20:24:21,811 (trainer:732) INFO: 19epoch:train:6201-6300batch: iter_time=1.002e-04, forward_time=0.109, loss_ctc=72.569, loss_att=58.922, acc=0.692, loss=63.016, backward_time=0.756, grad_norm=114.121, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.421e-05, train_time=2.012 +[gpua003:0/64] 2023-07-06 20:26:02,019 (trainer:732) INFO: 19epoch:train:6301-6400batch: iter_time=1.042e-04, forward_time=0.110, loss_ctc=67.317, loss_att=54.258, acc=0.696, loss=58.176, backward_time=0.753, grad_norm=86.928, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.419e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 20:27:41,637 (trainer:732) INFO: 19epoch:train:6401-6500batch: iter_time=1.161e-04, forward_time=0.108, loss_ctc=66.587, loss_att=54.003, acc=0.706, loss=57.778, backward_time=0.750, grad_norm=93.696, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.417e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 20:29:21,369 (trainer:732) INFO: 19epoch:train:6501-6600batch: iter_time=1.061e-04, forward_time=0.109, loss_ctc=78.539, loss_att=62.535, acc=0.711, loss=67.336, backward_time=0.751, grad_norm=87.737, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.414e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 20:30:33,131 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-06 20:30:52,251 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 20:30:55,813 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 20:30:55,813 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 20:30:55,820 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 20:35:42,565 (trainer:732) INFO: 19epoch:train:6601-6700batch: iter_time=1.437, forward_time=0.110, loss_ctc=76.173, loss_att=59.992, acc=0.683, loss=64.847, backward_time=0.771, grad_norm=92.415, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.412e-05, train_time=7.624 +[gpua003:0/64] 2023-07-06 20:37:35,033 (trainer:732) INFO: 19epoch:train:6701-6800batch: iter_time=2.117e-04, forward_time=0.111, loss_ctc=69.258, loss_att=50.177, acc=0.703, loss=55.902, backward_time=0.765, grad_norm=87.979, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.410e-05, train_time=2.249 +[gpua003:0/64] 2023-07-06 20:39:33,155 (trainer:732) INFO: 19epoch:train:6801-6900batch: iter_time=8.933e-05, forward_time=0.110, loss_ctc=76.772, loss_att=57.481, acc=0.692, loss=63.268, backward_time=0.809, grad_norm=113.312, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.407e-05, train_time=2.362 +[gpua003:0/64] 2023-07-06 20:41:18,407 (trainer:732) INFO: 19epoch:train:6901-7000batch: iter_time=8.764e-05, forward_time=0.107, loss_ctc=69.838, loss_att=51.828, acc=0.699, loss=57.231, backward_time=0.770, grad_norm=80.826, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.405e-05, train_time=2.105 +[gpua003:0/64] 2023-07-06 20:43:01,811 (trainer:732) INFO: 19epoch:train:7001-7100batch: iter_time=8.895e-05, forward_time=0.108, loss_ctc=65.767, loss_att=52.168, acc=0.697, loss=56.247, backward_time=0.757, grad_norm=105.272, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.402e-05, train_time=2.068 +[gpua003:0/64] 2023-07-06 20:44:41,509 (trainer:732) INFO: 19epoch:train:7101-7200batch: iter_time=9.980e-05, forward_time=0.108, loss_ctc=71.601, loss_att=60.582, acc=0.689, loss=63.888, backward_time=0.751, grad_norm=87.586, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.400e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 20:46:21,090 (trainer:732) INFO: 19epoch:train:7201-7300batch: iter_time=1.134e-04, forward_time=0.108, loss_ctc=70.650, loss_att=53.644, acc=0.707, loss=58.745, backward_time=0.750, grad_norm=87.476, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.398e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 20:48:02,044 (trainer:732) INFO: 19epoch:train:7301-7400batch: iter_time=8.952e-05, forward_time=0.108, loss_ctc=72.244, loss_att=60.016, acc=0.710, loss=63.685, backward_time=0.752, grad_norm=87.451, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.395e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 20:49:41,828 (trainer:732) INFO: 19epoch:train:7401-7500batch: iter_time=8.351e-05, forward_time=0.108, loss_ctc=78.571, loss_att=61.691, acc=0.689, loss=66.755, backward_time=0.751, grad_norm=104.440, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.393e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 20:49:52,973 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-06 20:50:12,449 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 20:50:15,993 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 20:50:15,993 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 20:50:15,999 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 20:54:12,385 (trainer:732) INFO: 19epoch:train:7501-7600batch: iter_time=1.571, forward_time=0.130, loss_ctc=72.069, loss_att=54.503, acc=0.682, loss=59.773, backward_time=0.763, grad_norm=94.845, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.391e-05, train_time=5.411 +[gpua003:0/64] 2023-07-06 20:55:52,369 (trainer:732) INFO: 19epoch:train:7601-7700batch: iter_time=9.053e-05, forward_time=0.107, loss_ctc=69.256, loss_att=49.020, acc=0.700, loss=55.091, backward_time=0.751, grad_norm=86.882, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.388e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 20:57:32,499 (trainer:732) INFO: 19epoch:train:7701-7800batch: iter_time=8.971e-05, forward_time=0.107, loss_ctc=71.084, loss_att=55.278, acc=0.687, loss=60.020, backward_time=0.750, grad_norm=85.779, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.386e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 20:59:12,329 (trainer:732) INFO: 19epoch:train:7801-7900batch: iter_time=1.121e-04, forward_time=0.109, loss_ctc=69.918, loss_att=55.616, acc=0.691, loss=59.906, backward_time=0.752, grad_norm=90.907, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.383e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 21:00:52,131 (trainer:732) INFO: 19epoch:train:7901-8000batch: iter_time=1.091e-04, forward_time=0.110, loss_ctc=72.031, loss_att=58.828, acc=0.687, loss=62.788, backward_time=0.752, grad_norm=90.990, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.381e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 21:02:31,776 (trainer:732) INFO: 19epoch:train:8001-8100batch: iter_time=1.210e-04, forward_time=0.109, loss_ctc=69.895, loss_att=59.175, acc=0.679, loss=62.391, backward_time=0.751, grad_norm=96.311, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.379e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 21:04:11,333 (trainer:732) INFO: 19epoch:train:8101-8200batch: iter_time=1.196e-04, forward_time=0.109, loss_ctc=65.075, loss_att=46.957, acc=0.713, loss=52.393, backward_time=0.752, grad_norm=79.989, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.376e-05, train_time=1.991 +[gpua003:0/64] 2023-07-06 21:05:50,952 (trainer:732) INFO: 19epoch:train:8201-8300batch: iter_time=1.100e-04, forward_time=0.109, loss_ctc=83.512, loss_att=69.549, acc=0.682, loss=73.738, backward_time=0.752, grad_norm=92.984, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.374e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 21:06:26,463 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-06 21:06:45,859 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 21:06:49,639 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 21:06:49,639 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 21:06:49,645 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 21:10:21,900 (trainer:732) INFO: 19epoch:train:8301-8400batch: iter_time=1.289, forward_time=0.109, loss_ctc=73.988, loss_att=57.572, acc=0.681, loss=62.497, backward_time=0.774, grad_norm=92.924, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.372e-05, train_time=5.419 +[gpua003:0/64] 2023-07-06 21:12:03,025 (trainer:732) INFO: 19epoch:train:8401-8500batch: iter_time=9.593e-05, forward_time=0.108, loss_ctc=71.684, loss_att=50.782, acc=0.691, loss=57.053, backward_time=0.753, grad_norm=91.974, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.369e-05, train_time=2.022 +[gpua003:0/64] 2023-07-06 21:13:42,791 (trainer:732) INFO: 19epoch:train:8501-8600batch: iter_time=8.646e-05, forward_time=0.109, loss_ctc=74.130, loss_att=56.299, acc=0.692, loss=61.648, backward_time=0.753, grad_norm=87.929, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.367e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 21:15:23,522 (trainer:732) INFO: 19epoch:train:8601-8700batch: iter_time=9.629e-05, forward_time=0.109, loss_ctc=68.342, loss_att=52.472, acc=0.685, loss=57.233, backward_time=0.753, grad_norm=89.721, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.365e-05, train_time=2.014 +[gpua003:0/64] 2023-07-06 21:17:03,161 (trainer:732) INFO: 19epoch:train:8701-8800batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=72.982, loss_att=57.376, acc=0.697, loss=62.058, backward_time=0.751, grad_norm=113.686, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.362e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 21:18:43,104 (trainer:732) INFO: 19epoch:train:8801-8900batch: iter_time=9.576e-05, forward_time=0.109, loss_ctc=66.077, loss_att=53.057, acc=0.696, loss=56.963, backward_time=0.753, grad_norm=81.859, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.360e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 21:20:22,718 (trainer:732) INFO: 19epoch:train:8901-9000batch: iter_time=1.008e-04, forward_time=0.108, loss_ctc=65.904, loss_att=53.143, acc=0.700, loss=56.972, backward_time=0.751, grad_norm=84.308, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.358e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 21:22:02,446 (trainer:732) INFO: 19epoch:train:9001-9100batch: iter_time=9.656e-05, forward_time=0.108, loss_ctc=77.892, loss_att=65.356, acc=0.694, loss=69.117, backward_time=0.752, grad_norm=101.891, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.355e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 21:23:10,807 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-06 21:23:29,796 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 21:23:33,339 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 21:23:33,339 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 21:23:33,345 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 21:27:24,475 (trainer:732) INFO: 19epoch:train:9101-9200batch: iter_time=1.327, forward_time=0.109, loss_ctc=75.034, loss_att=57.612, acc=0.680, loss=62.839, backward_time=0.762, grad_norm=88.253, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.353e-05, train_time=6.440 +[gpua003:0/64] 2023-07-06 21:29:05,161 (trainer:732) INFO: 19epoch:train:9201-9300batch: iter_time=9.736e-05, forward_time=0.108, loss_ctc=71.238, loss_att=51.142, acc=0.705, loss=57.170, backward_time=0.754, grad_norm=81.302, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.351e-05, train_time=2.013 +[gpua003:0/64] 2023-07-06 21:30:46,142 (trainer:732) INFO: 19epoch:train:9301-9400batch: iter_time=1.061e-04, forward_time=0.109, loss_ctc=74.369, loss_att=57.598, acc=0.690, loss=62.630, backward_time=0.752, grad_norm=101.798, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.348e-05, train_time=2.019 +[gpua003:0/64] 2023-07-06 21:32:26,233 (trainer:732) INFO: 19epoch:train:9401-9500batch: iter_time=9.370e-05, forward_time=0.109, loss_ctc=67.835, loss_att=50.281, acc=0.706, loss=55.547, backward_time=0.752, grad_norm=88.722, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.346e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 21:34:06,640 (trainer:732) INFO: 19epoch:train:9501-9600batch: iter_time=1.055e-04, forward_time=0.110, loss_ctc=64.836, loss_att=52.472, acc=0.696, loss=56.181, backward_time=0.753, grad_norm=90.882, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.344e-05, train_time=2.008 +[gpua003:0/64] 2023-07-06 21:36:03,018 (trainer:732) INFO: 19epoch:train:9601-9700batch: iter_time=7.040e-04, forward_time=0.150, loss_ctc=70.591, loss_att=59.315, acc=0.695, loss=62.698, backward_time=0.779, grad_norm=92.334, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.341e-05, train_time=2.327 +[gpua003:0/64] 2023-07-06 21:37:44,980 (trainer:732) INFO: 19epoch:train:9701-9800batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=69.523, loss_att=55.550, acc=0.707, loss=59.741, backward_time=0.753, grad_norm=96.147, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.339e-05, train_time=2.039 +[gpua003:0/64] 2023-07-06 21:39:24,859 (trainer:732) INFO: 19epoch:train:9801-9900batch: iter_time=1.053e-04, forward_time=0.110, loss_ctc=73.187, loss_att=59.236, acc=0.717, loss=63.421, backward_time=0.753, grad_norm=85.059, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.337e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 21:41:04,508 (trainer:732) INFO: 19epoch:train:9901-10000batch: iter_time=9.273e-05, forward_time=0.108, loss_ctc=79.916, loss_att=59.698, acc=0.692, loss=65.764, backward_time=0.751, grad_norm=87.679, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.334e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 21:53:26,999 (trainer:338) INFO: 19epoch results: [train] iter_time=0.201, forward_time=0.110, loss_ctc=72.442, loss_att=56.449, acc=0.691, loss=61.247, backward_time=0.757, grad_norm=94.422, clip=100.000, loss_scale=7.494e+17, optim_step_time=0.113, optim0_lr0=8.452e-05, train_time=2.614, time=3 hours, 38 minutes and 13.18 seconds, total_count=160000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.578, cer_ctc=0.291, loss_att=43.395, acc=0.637, cer=0.442, wer=1.000, loss=45.550, time=5 minutes and 44.01 seconds, total_count=16698, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 18.47 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-06 21:53:45,891 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-06 21:53:45,932 (trainer:272) INFO: 20/100epoch started. Estimated time to finish: 1 week, 6 days and 1 hour +[gpua003:0/64] 2023-07-06 21:53:46,828 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-06 21:54:05,845 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 21:54:10,947 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 21:54:10,947 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-06 21:54:11,034 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 22:01:50,792 (trainer:732) INFO: 20epoch:train:1-100batch: iter_time=3.782, forward_time=0.136, loss_ctc=71.854, loss_att=51.948, acc=0.710, loss=57.919, backward_time=0.767, grad_norm=91.560, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.332e-05, train_time=9.690 +[gpua003:0/64] 2023-07-06 22:03:30,984 (trainer:732) INFO: 20epoch:train:101-200batch: iter_time=9.746e-05, forward_time=0.108, loss_ctc=68.269, loss_att=50.073, acc=0.698, loss=55.532, backward_time=0.752, grad_norm=89.704, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.330e-05, train_time=2.004 +[gpua003:0/64] 2023-07-06 22:05:13,770 (trainer:732) INFO: 20epoch:train:201-300batch: iter_time=1.101e-04, forward_time=0.109, loss_ctc=76.182, loss_att=54.086, acc=0.680, loss=60.715, backward_time=0.754, grad_norm=92.398, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.327e-05, train_time=2.056 +[gpua003:0/64] 2023-07-06 22:06:54,574 (trainer:732) INFO: 20epoch:train:301-400batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=72.772, loss_att=56.293, acc=0.685, loss=61.237, backward_time=0.751, grad_norm=105.066, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.325e-05, train_time=2.016 +[gpua003:0/64] 2023-07-06 22:08:34,689 (trainer:732) INFO: 20epoch:train:401-500batch: iter_time=1.021e-04, forward_time=0.106, loss_ctc=75.522, loss_att=55.332, acc=0.684, loss=61.389, backward_time=0.749, grad_norm=97.945, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.323e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 22:10:14,287 (trainer:732) INFO: 20epoch:train:501-600batch: iter_time=1.003e-04, forward_time=0.107, loss_ctc=73.512, loss_att=56.609, acc=0.702, loss=61.680, backward_time=0.750, grad_norm=84.685, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.321e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 22:11:54,452 (trainer:732) INFO: 20epoch:train:601-700batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=78.698, loss_att=54.562, acc=0.686, loss=61.803, backward_time=0.750, grad_norm=98.035, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.318e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 22:13:40,823 (trainer:732) INFO: 20epoch:train:701-800batch: iter_time=1.096e-04, forward_time=0.108, loss_ctc=89.847, loss_att=64.589, acc=0.689, loss=72.167, backward_time=0.762, grad_norm=100.675, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.316e-05, train_time=2.127 +[gpua003:0/64] 2023-07-06 22:14:20,761 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-06 22:14:39,405 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 22:14:43,119 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 22:14:43,119 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-06 22:14:43,125 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 22:18:32,284 (trainer:732) INFO: 20epoch:train:801-900batch: iter_time=1.362, forward_time=0.108, loss_ctc=71.198, loss_att=53.976, acc=0.705, loss=59.142, backward_time=0.768, grad_norm=80.563, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.314e-05, train_time=5.829 +[gpua003:0/64] 2023-07-06 22:20:13,201 (trainer:732) INFO: 20epoch:train:901-1000batch: iter_time=9.604e-05, forward_time=0.108, loss_ctc=67.658, loss_att=48.534, acc=0.705, loss=54.271, backward_time=0.755, grad_norm=86.115, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.311e-05, train_time=2.018 +[gpua003:0/64] 2023-07-06 22:21:53,069 (trainer:732) INFO: 20epoch:train:1001-1100batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=75.100, loss_att=54.061, acc=0.692, loss=60.372, backward_time=0.753, grad_norm=94.035, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.309e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 22:23:34,205 (trainer:732) INFO: 20epoch:train:1101-1200batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=75.994, loss_att=54.925, acc=0.686, loss=61.246, backward_time=0.754, grad_norm=94.166, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.307e-05, train_time=2.022 +[gpua003:0/64] 2023-07-06 22:25:14,385 (trainer:732) INFO: 20epoch:train:1201-1300batch: iter_time=9.688e-05, forward_time=0.108, loss_ctc=68.867, loss_att=52.161, acc=0.694, loss=57.173, backward_time=0.752, grad_norm=88.133, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.304e-05, train_time=2.003 +[gpua003:0/64] 2023-07-06 22:26:54,236 (trainer:732) INFO: 20epoch:train:1301-1400batch: iter_time=9.936e-05, forward_time=0.108, loss_ctc=74.280, loss_att=59.054, acc=0.691, loss=63.622, backward_time=0.753, grad_norm=81.828, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.302e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 22:28:34,189 (trainer:732) INFO: 20epoch:train:1401-1500batch: iter_time=9.366e-05, forward_time=0.108, loss_ctc=72.329, loss_att=51.203, acc=0.701, loss=57.541, backward_time=0.752, grad_norm=88.414, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.300e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 22:30:15,525 (trainer:732) INFO: 20epoch:train:1501-1600batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=91.703, loss_att=61.724, acc=0.683, loss=70.718, backward_time=0.754, grad_norm=106.713, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.298e-05, train_time=2.026 +[gpua003:0/64] 2023-07-06 22:31:22,752 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-06 22:31:42,368 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 22:31:46,184 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 22:31:46,184 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-06 22:31:46,190 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 22:35:07,262 (trainer:732) INFO: 20epoch:train:1601-1700batch: iter_time=1.291, forward_time=0.108, loss_ctc=71.960, loss_att=56.558, acc=0.711, loss=61.178, backward_time=0.764, grad_norm=84.868, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.295e-05, train_time=5.835 +[gpua003:0/64] 2023-07-06 22:36:48,148 (trainer:732) INFO: 20epoch:train:1701-1800batch: iter_time=1.106e-04, forward_time=0.109, loss_ctc=70.844, loss_att=53.568, acc=0.697, loss=58.751, backward_time=0.755, grad_norm=109.667, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.293e-05, train_time=2.017 +[gpua003:0/64] 2023-07-06 22:38:27,849 (trainer:732) INFO: 20epoch:train:1801-1900batch: iter_time=9.001e-05, forward_time=0.108, loss_ctc=70.234, loss_att=49.729, acc=0.708, loss=55.881, backward_time=0.752, grad_norm=104.621, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.291e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 22:40:07,934 (trainer:732) INFO: 20epoch:train:1901-2000batch: iter_time=9.188e-05, forward_time=0.109, loss_ctc=74.679, loss_att=53.534, acc=0.678, loss=59.877, backward_time=0.753, grad_norm=99.994, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.288e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 22:41:47,719 (trainer:732) INFO: 20epoch:train:2001-2100batch: iter_time=9.469e-05, forward_time=0.109, loss_ctc=72.452, loss_att=57.103, acc=0.685, loss=61.708, backward_time=0.753, grad_norm=88.374, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.286e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 22:43:27,515 (trainer:732) INFO: 20epoch:train:2101-2200batch: iter_time=9.559e-05, forward_time=0.108, loss_ctc=72.276, loss_att=54.565, acc=0.690, loss=59.878, backward_time=0.753, grad_norm=85.377, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.284e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 22:45:07,265 (trainer:732) INFO: 20epoch:train:2201-2300batch: iter_time=1.133e-04, forward_time=0.109, loss_ctc=70.952, loss_att=53.601, acc=0.697, loss=58.807, backward_time=0.752, grad_norm=91.829, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.282e-05, train_time=1.995 +[gpua003:0/64] 2023-07-06 22:46:46,964 (trainer:732) INFO: 20epoch:train:2301-2400batch: iter_time=1.050e-04, forward_time=0.109, loss_ctc=82.613, loss_att=58.490, acc=0.677, loss=65.727, backward_time=0.751, grad_norm=108.551, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.279e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 22:48:27,085 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-06 22:48:46,472 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 22:48:50,356 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 22:48:50,356 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-06 22:48:50,362 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 22:53:34,506 (trainer:732) INFO: 20epoch:train:2401-2500batch: iter_time=2.844, forward_time=0.130, loss_ctc=80.783, loss_att=60.279, acc=0.694, loss=66.430, backward_time=0.756, grad_norm=95.454, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.115, optim0_lr0=8.277e-05, train_time=8.151 +[gpua003:0/64] 2023-07-06 22:55:17,165 (trainer:732) INFO: 20epoch:train:2501-2600batch: iter_time=9.896e-05, forward_time=0.113, loss_ctc=70.675, loss_att=49.400, acc=0.713, loss=55.783, backward_time=0.761, grad_norm=88.563, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.275e-05, train_time=2.053 +[gpua003:0/64] 2023-07-06 22:56:57,735 (trainer:732) INFO: 20epoch:train:2601-2700batch: iter_time=9.556e-05, forward_time=0.108, loss_ctc=74.837, loss_att=54.096, acc=0.700, loss=60.318, backward_time=0.756, grad_norm=95.551, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.273e-05, train_time=2.011 +[gpua003:0/64] 2023-07-06 22:58:38,514 (trainer:732) INFO: 20epoch:train:2701-2800batch: iter_time=9.973e-05, forward_time=0.107, loss_ctc=75.135, loss_att=51.859, acc=0.679, loss=58.842, backward_time=0.752, grad_norm=88.078, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.270e-05, train_time=2.015 +[gpua003:0/64] 2023-07-06 23:00:20,949 (trainer:732) INFO: 20epoch:train:2801-2900batch: iter_time=9.541e-05, forward_time=0.108, loss_ctc=69.255, loss_att=55.518, acc=0.700, loss=59.639, backward_time=0.755, grad_norm=80.641, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.268e-05, train_time=2.048 +[gpua003:0/64] 2023-07-06 23:02:14,547 (trainer:732) INFO: 20epoch:train:2901-3000batch: iter_time=9.836e-05, forward_time=0.109, loss_ctc=74.405, loss_att=53.787, acc=0.692, loss=59.972, backward_time=0.782, grad_norm=90.228, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.266e-05, train_time=2.272 +[gpua003:0/64] 2023-07-06 23:03:54,862 (trainer:732) INFO: 20epoch:train:3001-3100batch: iter_time=9.682e-05, forward_time=0.109, loss_ctc=68.569, loss_att=51.720, acc=0.703, loss=56.774, backward_time=0.753, grad_norm=80.182, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.264e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 23:05:36,742 (trainer:732) INFO: 20epoch:train:3101-3200batch: iter_time=9.416e-05, forward_time=0.110, loss_ctc=81.585, loss_att=57.636, acc=0.695, loss=64.820, backward_time=0.756, grad_norm=114.022, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.261e-05, train_time=2.037 +[gpua003:0/64] 2023-07-06 23:07:16,980 (trainer:732) INFO: 20epoch:train:3201-3300batch: iter_time=9.267e-05, forward_time=0.109, loss_ctc=83.278, loss_att=62.301, acc=0.690, loss=68.594, backward_time=0.752, grad_norm=102.383, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.259e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 23:07:51,559 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-06 23:08:11,129 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 23:08:14,689 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 23:08:14,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-06 23:08:14,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 23:13:03,956 (trainer:732) INFO: 20epoch:train:3301-3400batch: iter_time=1.301, forward_time=0.109, loss_ctc=78.861, loss_att=56.676, acc=0.700, loss=63.331, backward_time=0.769, grad_norm=114.958, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.257e-05, train_time=6.939 +[gpua003:0/64] 2023-07-06 23:14:44,012 (trainer:732) INFO: 20epoch:train:3401-3500batch: iter_time=9.664e-05, forward_time=0.108, loss_ctc=68.025, loss_att=50.075, acc=0.699, loss=55.460, backward_time=0.753, grad_norm=84.489, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.255e-05, train_time=2.001 +[gpua003:0/64] 2023-07-06 23:16:27,830 (trainer:732) INFO: 20epoch:train:3501-3600batch: iter_time=9.904e-05, forward_time=0.108, loss_ctc=74.312, loss_att=52.930, acc=0.693, loss=59.345, backward_time=0.765, grad_norm=93.179, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.252e-05, train_time=2.076 +[gpua003:0/64] 2023-07-06 23:18:08,117 (trainer:732) INFO: 20epoch:train:3601-3700batch: iter_time=1.068e-04, forward_time=0.110, loss_ctc=73.489, loss_att=54.029, acc=0.682, loss=59.867, backward_time=0.753, grad_norm=96.418, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.250e-05, train_time=2.006 +[gpua003:0/64] 2023-07-06 23:19:47,962 (trainer:732) INFO: 20epoch:train:3701-3800batch: iter_time=9.700e-05, forward_time=0.107, loss_ctc=67.104, loss_att=51.126, acc=0.699, loss=55.919, backward_time=0.751, grad_norm=85.122, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.248e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 23:21:27,624 (trainer:732) INFO: 20epoch:train:3801-3900batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=73.938, loss_att=60.096, acc=0.686, loss=64.249, backward_time=0.751, grad_norm=89.373, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.246e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 23:23:07,236 (trainer:732) INFO: 20epoch:train:3901-4000batch: iter_time=1.016e-04, forward_time=0.108, loss_ctc=72.519, loss_att=51.367, acc=0.692, loss=57.713, backward_time=0.752, grad_norm=97.338, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.243e-05, train_time=1.992 +[gpua003:0/64] 2023-07-06 23:24:46,969 (trainer:732) INFO: 20epoch:train:4001-4100batch: iter_time=9.761e-05, forward_time=0.108, loss_ctc=88.906, loss_att=60.730, acc=0.683, loss=69.183, backward_time=0.751, grad_norm=111.352, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.241e-05, train_time=1.994 +[gpua003:0/64] 2023-07-06 23:25:53,953 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-06 23:26:13,125 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 23:26:16,688 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 23:26:16,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-06 23:26:16,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 23:29:48,281 (trainer:732) INFO: 20epoch:train:4101-4200batch: iter_time=1.285, forward_time=0.108, loss_ctc=71.787, loss_att=56.750, acc=0.703, loss=61.261, backward_time=0.763, grad_norm=89.955, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.239e-05, train_time=6.026 +[gpua003:0/64] 2023-07-06 23:31:29,587 (trainer:732) INFO: 20epoch:train:4201-4300batch: iter_time=1.041e-04, forward_time=0.109, loss_ctc=70.768, loss_att=51.030, acc=0.713, loss=56.951, backward_time=0.758, grad_norm=86.739, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.237e-05, train_time=2.026 +[gpua003:0/64] 2023-07-06 23:33:09,537 (trainer:732) INFO: 20epoch:train:4301-4400batch: iter_time=1.101e-04, forward_time=0.108, loss_ctc=69.008, loss_att=48.198, acc=0.717, loss=54.441, backward_time=0.752, grad_norm=98.169, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.234e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 23:34:49,543 (trainer:732) INFO: 20epoch:train:4401-4500batch: iter_time=1.108e-04, forward_time=0.108, loss_ctc=73.392, loss_att=51.724, acc=0.689, loss=58.225, backward_time=0.753, grad_norm=99.811, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.232e-05, train_time=2.000 +[gpua003:0/64] 2023-07-06 23:36:29,633 (trainer:732) INFO: 20epoch:train:4501-4600batch: iter_time=1.189e-04, forward_time=0.109, loss_ctc=71.304, loss_att=56.277, acc=0.699, loss=60.785, backward_time=0.753, grad_norm=84.933, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.230e-05, train_time=2.002 +[gpua003:0/64] 2023-07-06 23:38:09,605 (trainer:732) INFO: 20epoch:train:4601-4700batch: iter_time=1.208e-04, forward_time=0.108, loss_ctc=70.498, loss_att=54.270, acc=0.697, loss=59.139, backward_time=0.752, grad_norm=83.394, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.228e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 23:39:49,275 (trainer:732) INFO: 20epoch:train:4701-4800batch: iter_time=1.190e-04, forward_time=0.107, loss_ctc=71.120, loss_att=52.173, acc=0.708, loss=57.857, backward_time=0.750, grad_norm=96.519, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.225e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 23:41:28,953 (trainer:732) INFO: 20epoch:train:4801-4900batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=81.570, loss_att=58.452, acc=0.689, loss=65.387, backward_time=0.750, grad_norm=95.091, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.223e-05, train_time=1.993 +[gpua003:0/64] 2023-07-06 23:43:09,201 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-06 23:43:28,281 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-06 23:43:31,799 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-06 23:43:31,799 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-06 23:43:31,806 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-06 23:46:22,528 (trainer:732) INFO: 20epoch:train:4901-5000batch: iter_time=1.282, forward_time=0.108, loss_ctc=79.944, loss_att=59.404, acc=0.708, loss=65.566, backward_time=0.755, grad_norm=106.026, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.221e-05, train_time=5.871 +[gpua003:0/64] 2023-07-06 23:48:04,124 (trainer:732) INFO: 20epoch:train:5001-5100batch: iter_time=1.074e-04, forward_time=0.107, loss_ctc=73.293, loss_att=52.693, acc=0.711, loss=58.873, backward_time=0.757, grad_norm=83.888, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.219e-05, train_time=2.032 +[gpua003:0/64] 2023-07-06 23:49:44,402 (trainer:732) INFO: 20epoch:train:5101-5200batch: iter_time=1.044e-04, forward_time=0.106, loss_ctc=67.920, loss_att=49.443, acc=0.700, loss=54.986, backward_time=0.751, grad_norm=92.376, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.217e-05, train_time=2.005 +[gpua003:0/64] 2023-07-06 23:51:24,233 (trainer:732) INFO: 20epoch:train:5201-5300batch: iter_time=1.049e-04, forward_time=0.106, loss_ctc=77.875, loss_att=55.129, acc=0.680, loss=61.953, backward_time=0.751, grad_norm=110.700, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.214e-05, train_time=1.996 +[gpua003:0/64] 2023-07-06 23:53:04,201 (trainer:732) INFO: 20epoch:train:5301-5400batch: iter_time=1.111e-04, forward_time=0.107, loss_ctc=66.903, loss_att=50.769, acc=0.700, loss=55.609, backward_time=0.751, grad_norm=80.652, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.212e-05, train_time=1.999 +[gpua003:0/64] 2023-07-06 23:54:50,887 (trainer:732) INFO: 20epoch:train:5401-5500batch: iter_time=1.171e-04, forward_time=0.107, loss_ctc=73.965, loss_att=54.060, acc=0.695, loss=60.031, backward_time=0.764, grad_norm=92.000, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.210e-05, train_time=2.133 +[gpua003:0/64] 2023-07-06 23:56:33,008 (trainer:732) INFO: 20epoch:train:5501-5600batch: iter_time=1.043e-04, forward_time=0.108, loss_ctc=72.161, loss_att=57.079, acc=0.703, loss=61.603, backward_time=0.754, grad_norm=88.705, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.208e-05, train_time=2.042 +[gpua003:0/64] 2023-07-06 23:58:12,856 (trainer:732) INFO: 20epoch:train:5601-5700batch: iter_time=1.017e-04, forward_time=0.109, loss_ctc=76.027, loss_att=53.040, acc=0.686, loss=59.936, backward_time=0.753, grad_norm=101.954, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.205e-05, train_time=1.997 +[gpua003:0/64] 2023-07-06 23:59:52,835 (trainer:732) INFO: 20epoch:train:5701-5800batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=83.943, loss_att=63.565, acc=0.695, loss=69.678, backward_time=0.753, grad_norm=152.716, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.114, optim0_lr0=8.203e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 00:00:26,092 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-07 00:00:45,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 00:00:48,757 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 00:00:48,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-07 00:00:48,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 00:05:13,212 (trainer:732) INFO: 20epoch:train:5801-5900batch: iter_time=1.287, forward_time=0.108, loss_ctc=69.493, loss_att=50.272, acc=0.713, loss=56.039, backward_time=0.767, grad_norm=90.186, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.201e-05, train_time=6.407 +[gpua003:0/64] 2023-07-07 00:06:53,937 (trainer:732) INFO: 20epoch:train:5901-6000batch: iter_time=9.985e-05, forward_time=0.107, loss_ctc=67.619, loss_att=48.654, acc=0.710, loss=54.344, backward_time=0.755, grad_norm=80.945, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.199e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 00:08:34,265 (trainer:732) INFO: 20epoch:train:6001-6100batch: iter_time=9.513e-05, forward_time=0.107, loss_ctc=72.368, loss_att=52.436, acc=0.699, loss=58.415, backward_time=0.754, grad_norm=99.544, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.197e-05, train_time=2.006 +[gpua003:0/64] 2023-07-07 00:10:14,094 (trainer:732) INFO: 20epoch:train:6101-6200batch: iter_time=1.063e-04, forward_time=0.107, loss_ctc=72.573, loss_att=53.094, acc=0.693, loss=58.938, backward_time=0.752, grad_norm=96.528, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.194e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 00:11:54,093 (trainer:732) INFO: 20epoch:train:6201-6300batch: iter_time=9.448e-05, forward_time=0.108, loss_ctc=67.484, loss_att=50.808, acc=0.704, loss=55.811, backward_time=0.753, grad_norm=108.855, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.192e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 00:13:34,349 (trainer:732) INFO: 20epoch:train:6301-6400batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=72.464, loss_att=56.785, acc=0.702, loss=61.489, backward_time=0.753, grad_norm=87.129, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.190e-05, train_time=2.005 +[gpua003:0/64] 2023-07-07 00:15:14,312 (trainer:732) INFO: 20epoch:train:6401-6500batch: iter_time=9.419e-05, forward_time=0.108, loss_ctc=71.280, loss_att=50.161, acc=0.709, loss=56.497, backward_time=0.754, grad_norm=98.013, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.188e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 00:16:54,147 (trainer:732) INFO: 20epoch:train:6501-6600batch: iter_time=1.010e-04, forward_time=0.107, loss_ctc=90.171, loss_att=63.851, acc=0.685, loss=71.747, backward_time=0.753, grad_norm=108.463, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.186e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 00:18:00,433 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-07 00:18:19,726 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 00:18:23,276 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 00:18:23,276 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-07 00:18:23,282 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 00:22:45,396 (trainer:732) INFO: 20epoch:train:6601-6700batch: iter_time=1.296, forward_time=0.108, loss_ctc=74.059, loss_att=56.550, acc=0.707, loss=61.803, backward_time=0.761, grad_norm=91.757, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.183e-05, train_time=7.025 +[gpua003:0/64] 2023-07-07 00:24:26,019 (trainer:732) INFO: 20epoch:train:6701-6800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=69.347, loss_att=50.947, acc=0.699, loss=56.467, backward_time=0.755, grad_norm=89.501, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.181e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 00:26:06,137 (trainer:732) INFO: 20epoch:train:6801-6900batch: iter_time=1.227e-04, forward_time=0.107, loss_ctc=70.173, loss_att=49.974, acc=0.698, loss=56.034, backward_time=0.751, grad_norm=91.982, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.179e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 00:27:45,899 (trainer:732) INFO: 20epoch:train:6901-7000batch: iter_time=1.212e-04, forward_time=0.108, loss_ctc=71.799, loss_att=52.784, acc=0.684, loss=58.488, backward_time=0.752, grad_norm=105.648, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.177e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 00:29:25,719 (trainer:732) INFO: 20epoch:train:7001-7100batch: iter_time=1.125e-04, forward_time=0.107, loss_ctc=69.357, loss_att=53.933, acc=0.692, loss=58.560, backward_time=0.752, grad_norm=90.499, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.175e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 00:31:05,462 (trainer:732) INFO: 20epoch:train:7101-7200batch: iter_time=1.086e-04, forward_time=0.108, loss_ctc=68.020, loss_att=53.832, acc=0.699, loss=58.088, backward_time=0.752, grad_norm=108.903, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.173e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 00:32:45,164 (trainer:732) INFO: 20epoch:train:7201-7300batch: iter_time=1.177e-04, forward_time=0.108, loss_ctc=71.562, loss_att=52.820, acc=0.698, loss=58.443, backward_time=0.753, grad_norm=92.492, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.170e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 00:34:25,792 (trainer:732) INFO: 20epoch:train:7301-7400batch: iter_time=1.149e-04, forward_time=0.108, loss_ctc=84.374, loss_att=58.985, acc=0.686, loss=66.602, backward_time=0.752, grad_norm=103.222, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.168e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 00:36:05,793 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-07 00:36:24,910 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 00:36:28,439 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 00:36:28,439 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-07 00:36:28,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 00:39:39,875 (trainer:732) INFO: 20epoch:train:7401-7500batch: iter_time=1.300, forward_time=0.108, loss_ctc=82.541, loss_att=62.936, acc=0.691, loss=68.817, backward_time=0.758, grad_norm=115.515, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.166e-05, train_time=6.281 +[gpua003:0/64] 2023-07-07 00:41:23,429 (trainer:732) INFO: 20epoch:train:7501-7600batch: iter_time=9.637e-05, forward_time=0.109, loss_ctc=69.225, loss_att=48.998, acc=0.720, loss=55.066, backward_time=0.763, grad_norm=96.997, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.164e-05, train_time=2.071 +[gpua003:0/64] 2023-07-07 00:43:03,727 (trainer:732) INFO: 20epoch:train:7601-7700batch: iter_time=1.074e-04, forward_time=0.107, loss_ctc=74.248, loss_att=54.779, acc=0.704, loss=60.620, backward_time=0.752, grad_norm=92.841, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.162e-05, train_time=2.006 +[gpua003:0/64] 2023-07-07 00:44:44,287 (trainer:732) INFO: 20epoch:train:7701-7800batch: iter_time=1.079e-04, forward_time=0.107, loss_ctc=73.724, loss_att=51.407, acc=0.683, loss=58.102, backward_time=0.753, grad_norm=97.431, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.159e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 00:46:24,185 (trainer:732) INFO: 20epoch:train:7801-7900batch: iter_time=9.605e-05, forward_time=0.108, loss_ctc=69.983, loss_att=56.359, acc=0.700, loss=60.446, backward_time=0.753, grad_norm=78.300, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.157e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 00:48:04,263 (trainer:732) INFO: 20epoch:train:7901-8000batch: iter_time=9.780e-05, forward_time=0.109, loss_ctc=71.676, loss_att=53.364, acc=0.698, loss=58.857, backward_time=0.754, grad_norm=93.587, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.155e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 00:49:45,962 (trainer:732) INFO: 20epoch:train:8001-8100batch: iter_time=9.342e-05, forward_time=0.108, loss_ctc=67.152, loss_att=50.735, acc=0.707, loss=55.660, backward_time=0.754, grad_norm=96.944, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.153e-05, train_time=2.034 +[gpua003:0/64] 2023-07-07 00:51:26,537 (trainer:732) INFO: 20epoch:train:8101-8200batch: iter_time=9.864e-05, forward_time=0.108, loss_ctc=81.176, loss_att=58.498, acc=0.696, loss=65.302, backward_time=0.753, grad_norm=108.921, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.151e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 00:53:07,235 (trainer:732) INFO: 20epoch:train:8201-8300batch: iter_time=1.006e-04, forward_time=0.108, loss_ctc=79.556, loss_att=61.957, acc=0.691, loss=67.237, backward_time=0.752, grad_norm=109.467, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.149e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 00:53:41,518 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-07 00:54:00,863 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 00:54:04,680 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 00:54:04,680 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-07 00:54:04,686 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 00:58:25,770 (trainer:732) INFO: 20epoch:train:8301-8400batch: iter_time=1.273, forward_time=0.108, loss_ctc=78.310, loss_att=60.461, acc=0.698, loss=65.816, backward_time=0.767, grad_norm=117.559, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.146e-05, train_time=6.370 +[gpua003:0/64] 2023-07-07 01:00:07,312 (trainer:732) INFO: 20epoch:train:8401-8500batch: iter_time=1.152e-04, forward_time=0.108, loss_ctc=67.962, loss_att=49.896, acc=0.703, loss=55.316, backward_time=0.754, grad_norm=90.877, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.114, optim0_lr0=8.144e-05, train_time=2.031 +[gpua003:0/64] 2023-07-07 01:01:48,019 (trainer:732) INFO: 20epoch:train:8501-8600batch: iter_time=1.031e-04, forward_time=0.110, loss_ctc=68.584, loss_att=49.176, acc=0.698, loss=54.998, backward_time=0.755, grad_norm=93.992, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.142e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 01:03:36,281 (trainer:732) INFO: 20epoch:train:8601-8700batch: iter_time=9.241e-05, forward_time=0.108, loss_ctc=75.435, loss_att=53.797, acc=0.684, loss=60.288, backward_time=0.759, grad_norm=99.257, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.140e-05, train_time=2.165 +[gpua003:0/64] 2023-07-07 01:05:22,596 (trainer:732) INFO: 20epoch:train:8701-8800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=69.540, loss_att=53.032, acc=0.705, loss=57.984, backward_time=0.759, grad_norm=88.332, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.138e-05, train_time=2.126 +[gpua003:0/64] 2023-07-07 01:07:02,503 (trainer:732) INFO: 20epoch:train:8801-8900batch: iter_time=1.033e-04, forward_time=0.108, loss_ctc=71.565, loss_att=57.189, acc=0.691, loss=61.502, backward_time=0.751, grad_norm=90.049, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.136e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 01:08:47,374 (trainer:732) INFO: 20epoch:train:8901-9000batch: iter_time=9.529e-05, forward_time=0.108, loss_ctc=74.747, loss_att=50.418, acc=0.695, loss=57.717, backward_time=0.757, grad_norm=98.679, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.134e-05, train_time=2.097 +[gpua003:0/64] 2023-07-07 01:10:27,736 (trainer:732) INFO: 20epoch:train:9001-9100batch: iter_time=9.856e-05, forward_time=0.107, loss_ctc=83.548, loss_att=63.248, acc=0.685, loss=69.338, backward_time=0.751, grad_norm=111.462, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.131e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 01:11:37,029 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-07 01:11:56,346 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 01:12:00,135 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 01:12:00,135 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-07 01:12:00,141 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 01:16:34,776 (trainer:732) INFO: 20epoch:train:9101-9200batch: iter_time=1.308, forward_time=0.108, loss_ctc=72.185, loss_att=56.405, acc=0.695, loss=61.139, backward_time=0.773, grad_norm=115.543, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.129e-05, train_time=7.341 +[gpua003:0/64] 2023-07-07 01:18:15,925 (trainer:732) INFO: 20epoch:train:9201-9300batch: iter_time=9.792e-05, forward_time=0.106, loss_ctc=71.745, loss_att=52.813, acc=0.704, loss=58.492, backward_time=0.754, grad_norm=89.778, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.127e-05, train_time=2.023 +[gpua003:0/64] 2023-07-07 01:19:59,197 (trainer:732) INFO: 20epoch:train:9301-9400batch: iter_time=9.737e-05, forward_time=0.107, loss_ctc=69.353, loss_att=48.395, acc=0.713, loss=54.682, backward_time=0.753, grad_norm=88.881, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.125e-05, train_time=2.065 +[gpua003:0/64] 2023-07-07 01:21:40,857 (trainer:732) INFO: 20epoch:train:9401-9500batch: iter_time=1.001e-04, forward_time=0.107, loss_ctc=72.030, loss_att=51.469, acc=0.684, loss=57.637, backward_time=0.753, grad_norm=97.051, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.123e-05, train_time=2.033 +[gpua003:0/64] 2023-07-07 01:23:21,684 (trainer:732) INFO: 20epoch:train:9501-9600batch: iter_time=9.299e-05, forward_time=0.107, loss_ctc=71.994, loss_att=56.267, acc=0.690, loss=60.985, backward_time=0.753, grad_norm=91.397, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.121e-05, train_time=2.016 +[gpua003:0/64] 2023-07-07 01:25:01,632 (trainer:732) INFO: 20epoch:train:9601-9700batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=70.203, loss_att=53.311, acc=0.696, loss=58.379, backward_time=0.753, grad_norm=104.136, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.118e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 01:26:41,518 (trainer:732) INFO: 20epoch:train:9701-9800batch: iter_time=8.494e-05, forward_time=0.107, loss_ctc=70.139, loss_att=51.677, acc=0.703, loss=57.216, backward_time=0.753, grad_norm=87.042, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.116e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 01:28:24,627 (trainer:732) INFO: 20epoch:train:9801-9900batch: iter_time=9.340e-05, forward_time=0.107, loss_ctc=82.680, loss_att=58.302, acc=0.679, loss=65.616, backward_time=0.763, grad_norm=95.875, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.114e-05, train_time=2.062 +[gpua003:0/64] 2023-07-07 01:30:09,616 (trainer:732) INFO: 20epoch:train:9901-10000batch: iter_time=9.389e-05, forward_time=0.107, loss_ctc=77.669, loss_att=57.909, acc=0.701, loss=63.837, backward_time=0.757, grad_norm=107.681, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.112e-05, train_time=2.100 +[gpua003:0/64] 2023-07-07 01:42:21,427 (trainer:338) INFO: 20epoch results: [train] iter_time=0.196, forward_time=0.108, loss_ctc=73.961, loss_att=54.526, acc=0.696, loss=60.357, backward_time=0.755, grad_norm=96.055, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.221e-05, train_time=2.596, time=3 hours, 36 minutes and 32.76 seconds, total_count=170000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=49.387, cer_ctc=0.283, loss_att=42.558, acc=0.643, cer=0.424, wer=0.999, loss=44.607, time=5 minutes and 50.92 seconds, total_count=17710, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 11.77 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-07 01:42:37,043 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-07 01:42:37,073 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till20epoch.pth +[gpua003:0/64] 2023-07-07 01:43:32,066 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till20epoch.pth +[gpua003:0/64] 2023-07-07 01:43:57,407 (trainer:272) INFO: 21/100epoch started. Estimated time to finish: 1 week, 5 days and 21 hours +[gpua003:0/64] 2023-07-07 01:43:58,913 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-07 01:44:18,582 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 01:44:24,346 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 01:44:24,346 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-07 01:44:24,417 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 01:50:53,031 (trainer:732) INFO: 21epoch:train:1-100batch: iter_time=3.069, forward_time=0.137, loss_ctc=75.425, loss_att=60.964, acc=0.696, loss=65.302, backward_time=0.770, grad_norm=105.263, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.117, optim0_lr0=8.110e-05, train_time=8.297 +[gpua003:0/64] 2023-07-07 01:52:34,428 (trainer:732) INFO: 21epoch:train:101-200batch: iter_time=1.033e-04, forward_time=0.109, loss_ctc=66.860, loss_att=53.856, acc=0.695, loss=57.757, backward_time=0.754, grad_norm=87.309, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.108e-05, train_time=2.028 +[gpua003:0/64] 2023-07-07 01:54:16,169 (trainer:732) INFO: 21epoch:train:201-300batch: iter_time=1.017e-04, forward_time=0.108, loss_ctc=68.585, loss_att=49.706, acc=0.718, loss=55.369, backward_time=0.751, grad_norm=89.606, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.106e-05, train_time=2.035 +[gpua003:0/64] 2023-07-07 01:55:56,901 (trainer:732) INFO: 21epoch:train:301-400batch: iter_time=9.549e-05, forward_time=0.109, loss_ctc=83.868, loss_att=59.563, acc=0.677, loss=66.854, backward_time=0.751, grad_norm=118.007, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.104e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 01:57:38,518 (trainer:732) INFO: 21epoch:train:401-500batch: iter_time=8.947e-05, forward_time=0.108, loss_ctc=75.494, loss_att=55.952, acc=0.710, loss=61.815, backward_time=0.753, grad_norm=96.012, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.101e-05, train_time=2.032 +[gpua003:0/64] 2023-07-07 01:59:25,089 (trainer:732) INFO: 21epoch:train:501-600batch: iter_time=8.360e-05, forward_time=0.107, loss_ctc=77.597, loss_att=58.124, acc=0.678, loss=63.966, backward_time=0.760, grad_norm=95.102, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.099e-05, train_time=2.131 +[gpua003:0/64] 2023-07-07 02:01:14,358 (trainer:732) INFO: 21epoch:train:601-700batch: iter_time=8.784e-05, forward_time=0.108, loss_ctc=79.901, loss_att=58.953, acc=0.688, loss=65.237, backward_time=0.766, grad_norm=109.677, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.097e-05, train_time=2.185 +[gpua003:0/64] 2023-07-07 02:03:06,286 (trainer:732) INFO: 21epoch:train:701-800batch: iter_time=8.960e-05, forward_time=0.108, loss_ctc=76.140, loss_att=52.512, acc=0.702, loss=59.600, backward_time=0.764, grad_norm=108.446, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.095e-05, train_time=2.238 +[gpua003:0/64] 2023-07-07 02:03:45,470 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-07 02:04:04,788 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 02:04:08,666 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 02:04:08,666 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-07 02:04:08,672 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 02:08:05,629 (trainer:732) INFO: 21epoch:train:801-900batch: iter_time=1.390, forward_time=0.152, loss_ctc=78.626, loss_att=63.039, acc=0.675, loss=67.715, backward_time=0.773, grad_norm=107.084, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.115, optim0_lr0=8.093e-05, train_time=5.987 +[gpua003:0/64] 2023-07-07 02:09:45,991 (trainer:732) INFO: 21epoch:train:901-1000batch: iter_time=9.686e-05, forward_time=0.109, loss_ctc=65.698, loss_att=52.821, acc=0.694, loss=56.684, backward_time=0.754, grad_norm=87.102, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.091e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 02:11:25,991 (trainer:732) INFO: 21epoch:train:1001-1100batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=70.095, loss_att=51.641, acc=0.713, loss=57.177, backward_time=0.751, grad_norm=85.639, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.089e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 02:13:05,844 (trainer:732) INFO: 21epoch:train:1101-1200batch: iter_time=1.030e-04, forward_time=0.108, loss_ctc=71.874, loss_att=52.739, acc=0.696, loss=58.480, backward_time=0.752, grad_norm=119.352, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.087e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 02:14:45,543 (trainer:732) INFO: 21epoch:train:1201-1300batch: iter_time=9.500e-05, forward_time=0.108, loss_ctc=79.532, loss_att=61.570, acc=0.679, loss=66.959, backward_time=0.750, grad_norm=83.751, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.114, optim0_lr0=8.084e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 02:16:25,354 (trainer:732) INFO: 21epoch:train:1301-1400batch: iter_time=9.425e-05, forward_time=0.108, loss_ctc=70.489, loss_att=51.580, acc=0.691, loss=57.252, backward_time=0.751, grad_norm=89.038, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.082e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 02:18:05,983 (trainer:732) INFO: 21epoch:train:1401-1500batch: iter_time=9.711e-05, forward_time=0.109, loss_ctc=79.242, loss_att=59.276, acc=0.681, loss=65.266, backward_time=0.754, grad_norm=99.915, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.080e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 02:19:45,694 (trainer:732) INFO: 21epoch:train:1501-1600batch: iter_time=1.032e-04, forward_time=0.107, loss_ctc=86.448, loss_att=59.634, acc=0.687, loss=67.678, backward_time=0.752, grad_norm=119.579, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.078e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 02:21:03,476 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-07 02:21:22,929 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 02:21:26,897 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 02:21:26,897 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-07 02:21:26,904 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 02:26:35,736 (trainer:732) INFO: 21epoch:train:1601-1700batch: iter_time=3.035, forward_time=0.137, loss_ctc=74.922, loss_att=57.634, acc=0.681, loss=62.820, backward_time=0.764, grad_norm=103.929, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.115, optim0_lr0=8.076e-05, train_time=8.200 +[gpua003:0/64] 2023-07-07 02:28:16,442 (trainer:732) INFO: 21epoch:train:1701-1800batch: iter_time=9.394e-05, forward_time=0.110, loss_ctc=67.692, loss_att=51.540, acc=0.720, loss=56.386, backward_time=0.754, grad_norm=84.127, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.074e-05, train_time=2.015 +[gpua003:0/64] 2023-07-07 02:29:56,377 (trainer:732) INFO: 21epoch:train:1801-1900batch: iter_time=9.295e-05, forward_time=0.109, loss_ctc=67.942, loss_att=54.566, acc=0.708, loss=58.578, backward_time=0.753, grad_norm=95.960, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.072e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 02:31:36,002 (trainer:732) INFO: 21epoch:train:1901-2000batch: iter_time=8.542e-05, forward_time=0.108, loss_ctc=62.560, loss_att=45.274, acc=0.709, loss=50.460, backward_time=0.750, grad_norm=78.689, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.070e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 02:33:15,755 (trainer:732) INFO: 21epoch:train:2001-2100batch: iter_time=8.779e-05, forward_time=0.108, loss_ctc=83.888, loss_att=62.880, acc=0.695, loss=69.182, backward_time=0.752, grad_norm=111.465, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.068e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 02:34:55,601 (trainer:732) INFO: 21epoch:train:2101-2200batch: iter_time=9.295e-05, forward_time=0.108, loss_ctc=71.779, loss_att=53.269, acc=0.708, loss=58.822, backward_time=0.753, grad_norm=78.300, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.066e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 02:36:35,519 (trainer:732) INFO: 21epoch:train:2201-2300batch: iter_time=8.719e-05, forward_time=0.109, loss_ctc=77.907, loss_att=57.920, acc=0.681, loss=63.916, backward_time=0.753, grad_norm=94.141, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.063e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 02:38:15,578 (trainer:732) INFO: 21epoch:train:2301-2400batch: iter_time=1.077e-04, forward_time=0.109, loss_ctc=77.449, loss_att=56.410, acc=0.699, loss=62.722, backward_time=0.751, grad_norm=114.357, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.061e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 02:40:15,411 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-07 02:40:34,895 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 02:40:38,734 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 02:40:38,734 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-07 02:40:38,741 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 02:43:57,355 (trainer:732) INFO: 21epoch:train:2401-2500batch: iter_time=1.755, forward_time=0.116, loss_ctc=73.960, loss_att=51.538, acc=0.693, loss=58.265, backward_time=0.768, grad_norm=101.777, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.059e-05, train_time=6.835 +[gpua003:0/64] 2023-07-07 02:45:38,947 (trainer:732) INFO: 21epoch:train:2501-2600batch: iter_time=1.017e-04, forward_time=0.107, loss_ctc=73.796, loss_att=60.954, acc=0.696, loss=64.807, backward_time=0.760, grad_norm=102.406, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.057e-05, train_time=2.032 +[gpua003:0/64] 2023-07-07 02:47:21,878 (trainer:732) INFO: 21epoch:train:2601-2700batch: iter_time=1.022e-04, forward_time=0.107, loss_ctc=65.924, loss_att=51.785, acc=0.701, loss=56.026, backward_time=0.752, grad_norm=94.190, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.055e-05, train_time=2.058 +[gpua003:0/64] 2023-07-07 02:49:01,878 (trainer:732) INFO: 21epoch:train:2701-2800batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=68.630, loss_att=49.530, acc=0.716, loss=55.260, backward_time=0.752, grad_norm=96.790, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.053e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 02:50:43,603 (trainer:732) INFO: 21epoch:train:2801-2900batch: iter_time=1.010e-04, forward_time=0.109, loss_ctc=78.316, loss_att=57.443, acc=0.680, loss=63.705, backward_time=0.757, grad_norm=110.859, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.051e-05, train_time=2.034 +[gpua003:0/64] 2023-07-07 02:52:23,596 (trainer:732) INFO: 21epoch:train:2901-3000batch: iter_time=9.699e-05, forward_time=0.110, loss_ctc=74.531, loss_att=55.316, acc=0.713, loss=61.081, backward_time=0.754, grad_norm=91.694, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.049e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 02:54:03,277 (trainer:732) INFO: 21epoch:train:3001-3100batch: iter_time=1.094e-04, forward_time=0.108, loss_ctc=72.826, loss_att=52.947, acc=0.686, loss=58.911, backward_time=0.751, grad_norm=96.991, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.047e-05, train_time=1.993 +[gpua003:0/64] 2023-07-07 02:55:45,673 (trainer:732) INFO: 21epoch:train:3101-3200batch: iter_time=1.044e-04, forward_time=0.128, loss_ctc=79.741, loss_att=59.681, acc=0.688, loss=65.699, backward_time=0.753, grad_norm=105.684, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.045e-05, train_time=2.048 +[gpua003:0/64] 2023-07-07 02:57:26,045 (trainer:732) INFO: 21epoch:train:3201-3300batch: iter_time=4.703e-04, forward_time=0.110, loss_ctc=72.892, loss_att=50.604, acc=0.704, loss=57.290, backward_time=0.750, grad_norm=98.215, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.043e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 02:57:59,564 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-07 02:58:19,306 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 02:58:23,210 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 02:58:23,210 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-07 02:58:23,216 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 03:04:26,311 (trainer:732) INFO: 21epoch:train:3301-3400batch: iter_time=1.411, forward_time=0.113, loss_ctc=70.531, loss_att=53.598, acc=0.698, loss=58.678, backward_time=0.765, grad_norm=95.913, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.040e-05, train_time=8.405 +[gpua003:0/64] 2023-07-07 03:06:06,849 (trainer:732) INFO: 21epoch:train:3401-3500batch: iter_time=1.204e-04, forward_time=0.109, loss_ctc=66.577, loss_att=55.002, acc=0.685, loss=58.474, backward_time=0.753, grad_norm=97.139, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.038e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 03:07:46,882 (trainer:732) INFO: 21epoch:train:3501-3600batch: iter_time=1.055e-04, forward_time=0.108, loss_ctc=68.566, loss_att=48.282, acc=0.717, loss=54.367, backward_time=0.754, grad_norm=84.774, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.036e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 03:09:26,695 (trainer:732) INFO: 21epoch:train:3601-3700batch: iter_time=1.129e-04, forward_time=0.109, loss_ctc=76.925, loss_att=57.566, acc=0.683, loss=63.374, backward_time=0.753, grad_norm=117.953, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.034e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 03:11:06,310 (trainer:732) INFO: 21epoch:train:3701-3800batch: iter_time=1.180e-04, forward_time=0.107, loss_ctc=70.698, loss_att=53.316, acc=0.704, loss=58.530, backward_time=0.751, grad_norm=94.370, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.032e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 03:12:46,196 (trainer:732) INFO: 21epoch:train:3801-3900batch: iter_time=1.218e-04, forward_time=0.109, loss_ctc=74.676, loss_att=54.150, acc=0.689, loss=60.307, backward_time=0.752, grad_norm=84.395, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.030e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 03:14:25,952 (trainer:732) INFO: 21epoch:train:3901-4000batch: iter_time=9.952e-05, forward_time=0.107, loss_ctc=79.508, loss_att=60.374, acc=0.682, loss=66.114, backward_time=0.752, grad_norm=109.277, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.028e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 03:16:05,888 (trainer:732) INFO: 21epoch:train:4001-4100batch: iter_time=8.976e-05, forward_time=0.108, loss_ctc=76.068, loss_att=53.950, acc=0.697, loss=60.586, backward_time=0.754, grad_norm=109.443, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.026e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 03:17:13,032 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-07 03:17:32,119 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 03:17:35,672 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 03:17:35,672 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-07 03:17:35,678 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 03:21:24,869 (trainer:732) INFO: 21epoch:train:4101-4200batch: iter_time=2.108, forward_time=0.163, loss_ctc=73.138, loss_att=59.185, acc=0.677, loss=63.371, backward_time=0.766, grad_norm=100.088, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.115, optim0_lr0=8.024e-05, train_time=6.378 +[gpua003:0/64] 2023-07-07 03:23:05,523 (trainer:732) INFO: 21epoch:train:4201-4300batch: iter_time=9.623e-05, forward_time=0.109, loss_ctc=67.172, loss_att=52.075, acc=0.720, loss=56.604, backward_time=0.755, grad_norm=80.071, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.022e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 03:24:45,805 (trainer:732) INFO: 21epoch:train:4301-4400batch: iter_time=1.166e-04, forward_time=0.111, loss_ctc=66.732, loss_att=52.975, acc=0.711, loss=57.102, backward_time=0.754, grad_norm=109.947, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.020e-05, train_time=2.005 +[gpua003:0/64] 2023-07-07 03:26:25,913 (trainer:732) INFO: 21epoch:train:4401-4500batch: iter_time=1.080e-04, forward_time=0.110, loss_ctc=63.186, loss_att=44.532, acc=0.715, loss=50.128, backward_time=0.755, grad_norm=80.740, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.018e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 03:28:05,766 (trainer:732) INFO: 21epoch:train:4501-4600batch: iter_time=1.226e-04, forward_time=0.110, loss_ctc=82.770, loss_att=62.550, acc=0.696, loss=68.616, backward_time=0.753, grad_norm=110.536, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.016e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 03:29:48,662 (trainer:732) INFO: 21epoch:train:4601-4700batch: iter_time=1.171e-04, forward_time=0.111, loss_ctc=71.738, loss_att=52.977, acc=0.712, loss=58.605, backward_time=0.757, grad_norm=88.053, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.014e-05, train_time=2.058 +[gpua003:0/64] 2023-07-07 03:31:28,763 (trainer:732) INFO: 21epoch:train:4701-4800batch: iter_time=1.118e-04, forward_time=0.110, loss_ctc=75.673, loss_att=54.196, acc=0.695, loss=60.639, backward_time=0.754, grad_norm=90.929, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.011e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 03:33:08,783 (trainer:732) INFO: 21epoch:train:4801-4900batch: iter_time=1.194e-04, forward_time=0.110, loss_ctc=76.080, loss_att=56.995, acc=0.697, loss=62.720, backward_time=0.755, grad_norm=119.887, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.009e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 03:34:49,915 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-07 03:35:09,166 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 03:35:12,753 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 03:35:12,753 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-07 03:35:12,760 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 03:39:22,173 (trainer:732) INFO: 21epoch:train:4901-5000batch: iter_time=1.327, forward_time=0.110, loss_ctc=72.216, loss_att=51.391, acc=0.693, loss=57.639, backward_time=0.762, grad_norm=97.793, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.007e-05, train_time=7.468 +[gpua003:0/64] 2023-07-07 03:41:06,488 (trainer:732) INFO: 21epoch:train:5001-5100batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=70.415, loss_att=54.270, acc=0.710, loss=59.113, backward_time=0.761, grad_norm=92.209, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.005e-05, train_time=2.086 +[gpua003:0/64] 2023-07-07 03:42:46,655 (trainer:732) INFO: 21epoch:train:5101-5200batch: iter_time=9.585e-05, forward_time=0.109, loss_ctc=64.440, loss_att=51.288, acc=0.697, loss=55.233, backward_time=0.754, grad_norm=91.829, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.003e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 03:44:26,435 (trainer:732) INFO: 21epoch:train:5201-5300batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=66.016, loss_att=47.983, acc=0.711, loss=53.393, backward_time=0.753, grad_norm=82.415, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.001e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 03:46:06,273 (trainer:732) INFO: 21epoch:train:5301-5400batch: iter_time=1.066e-04, forward_time=0.109, loss_ctc=79.568, loss_att=58.274, acc=0.689, loss=64.662, backward_time=0.752, grad_norm=101.250, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.999e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 03:47:45,977 (trainer:732) INFO: 21epoch:train:5401-5500batch: iter_time=9.211e-05, forward_time=0.108, loss_ctc=77.093, loss_att=57.211, acc=0.708, loss=63.176, backward_time=0.752, grad_norm=88.698, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.997e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 03:49:25,739 (trainer:732) INFO: 21epoch:train:5501-5600batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=70.776, loss_att=51.574, acc=0.689, loss=57.334, backward_time=0.752, grad_norm=92.327, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.995e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 03:51:05,561 (trainer:732) INFO: 21epoch:train:5601-5700batch: iter_time=9.823e-05, forward_time=0.109, loss_ctc=78.570, loss_att=59.212, acc=0.684, loss=65.020, backward_time=0.752, grad_norm=102.663, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.993e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 03:52:45,407 (trainer:732) INFO: 21epoch:train:5701-5800batch: iter_time=1.062e-04, forward_time=0.108, loss_ctc=74.565, loss_att=51.158, acc=0.691, loss=58.180, backward_time=0.753, grad_norm=101.119, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.991e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 03:53:20,039 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-07 03:53:39,181 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 03:53:42,818 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 03:53:42,818 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-07 03:53:42,824 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 03:58:59,477 (trainer:732) INFO: 21epoch:train:5801-5900batch: iter_time=1.377, forward_time=0.109, loss_ctc=68.222, loss_att=54.113, acc=0.702, loss=58.346, backward_time=0.768, grad_norm=85.484, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.989e-05, train_time=7.481 +[gpua003:0/64] 2023-07-07 04:00:40,031 (trainer:732) INFO: 21epoch:train:5901-6000batch: iter_time=9.701e-05, forward_time=0.108, loss_ctc=67.129, loss_att=54.573, acc=0.697, loss=58.340, backward_time=0.753, grad_norm=88.099, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.987e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 04:02:20,207 (trainer:732) INFO: 21epoch:train:6001-6100batch: iter_time=9.362e-05, forward_time=0.109, loss_ctc=68.337, loss_att=48.416, acc=0.728, loss=54.392, backward_time=0.752, grad_norm=86.732, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.985e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 04:04:01,464 (trainer:732) INFO: 21epoch:train:6101-6200batch: iter_time=9.073e-05, forward_time=0.109, loss_ctc=75.111, loss_att=55.799, acc=0.695, loss=61.593, backward_time=0.761, grad_norm=94.382, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.983e-05, train_time=2.025 +[gpua003:0/64] 2023-07-07 04:05:41,365 (trainer:732) INFO: 21epoch:train:6201-6300batch: iter_time=9.518e-05, forward_time=0.109, loss_ctc=69.408, loss_att=51.348, acc=0.719, loss=56.766, backward_time=0.752, grad_norm=91.702, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.981e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 04:07:21,149 (trainer:732) INFO: 21epoch:train:6301-6400batch: iter_time=9.310e-05, forward_time=0.109, loss_ctc=72.354, loss_att=53.860, acc=0.695, loss=59.408, backward_time=0.752, grad_norm=82.091, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.979e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 04:09:01,102 (trainer:732) INFO: 21epoch:train:6401-6500batch: iter_time=9.926e-05, forward_time=0.109, loss_ctc=78.032, loss_att=54.051, acc=0.697, loss=61.245, backward_time=0.753, grad_norm=110.132, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.977e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 04:10:40,790 (trainer:732) INFO: 21epoch:train:6501-6600batch: iter_time=9.807e-05, forward_time=0.107, loss_ctc=78.055, loss_att=56.847, acc=0.705, loss=63.209, backward_time=0.752, grad_norm=115.245, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.975e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 04:11:49,314 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-07 04:12:08,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 04:12:11,990 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 04:12:11,990 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-07 04:12:11,997 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 04:15:49,294 (trainer:732) INFO: 21epoch:train:6601-6700batch: iter_time=1.324, forward_time=0.109, loss_ctc=74.801, loss_att=60.444, acc=0.679, loss=64.751, backward_time=0.766, grad_norm=105.116, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.973e-05, train_time=6.170 +[gpua003:0/64] 2023-07-07 04:17:31,076 (trainer:732) INFO: 21epoch:train:6701-6800batch: iter_time=9.790e-05, forward_time=0.111, loss_ctc=67.289, loss_att=50.540, acc=0.720, loss=55.565, backward_time=0.757, grad_norm=81.960, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.971e-05, train_time=2.035 +[gpua003:0/64] 2023-07-07 04:19:10,966 (trainer:732) INFO: 21epoch:train:6801-6900batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=67.753, loss_att=53.586, acc=0.710, loss=57.836, backward_time=0.751, grad_norm=85.772, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.969e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 04:20:51,192 (trainer:732) INFO: 21epoch:train:6901-7000batch: iter_time=1.021e-04, forward_time=0.108, loss_ctc=62.269, loss_att=43.393, acc=0.718, loss=49.056, backward_time=0.752, grad_norm=94.628, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.967e-05, train_time=2.004 +[gpua003:0/64] 2023-07-07 04:22:31,278 (trainer:732) INFO: 21epoch:train:7001-7100batch: iter_time=1.125e-04, forward_time=0.109, loss_ctc=82.553, loss_att=62.057, acc=0.701, loss=68.206, backward_time=0.752, grad_norm=96.053, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.965e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 04:24:11,011 (trainer:732) INFO: 21epoch:train:7101-7200batch: iter_time=9.769e-05, forward_time=0.109, loss_ctc=73.617, loss_att=53.857, acc=0.710, loss=59.785, backward_time=0.752, grad_norm=106.016, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.963e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 04:25:51,930 (trainer:732) INFO: 21epoch:train:7201-7300batch: iter_time=2.744e-04, forward_time=0.119, loss_ctc=71.583, loss_att=52.769, acc=0.697, loss=58.413, backward_time=0.752, grad_norm=92.085, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.961e-05, train_time=2.018 +[gpua003:0/64] 2023-07-07 04:27:31,763 (trainer:732) INFO: 21epoch:train:7301-7400batch: iter_time=9.744e-05, forward_time=0.109, loss_ctc=78.262, loss_att=56.400, acc=0.700, loss=62.958, backward_time=0.753, grad_norm=108.333, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.959e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 04:29:11,616 (trainer:732) INFO: 21epoch:train:7401-7500batch: iter_time=9.512e-05, forward_time=0.109, loss_ctc=72.795, loss_att=51.382, acc=0.699, loss=57.806, backward_time=0.752, grad_norm=106.912, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.957e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 04:29:19,879 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-07 04:29:39,014 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 04:29:44,220 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 04:29:44,313 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-07 04:29:44,320 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 04:34:49,745 (trainer:732) INFO: 21epoch:train:7501-7600batch: iter_time=1.925, forward_time=0.158, loss_ctc=69.045, loss_att=53.787, acc=0.717, loss=58.364, backward_time=0.770, grad_norm=91.648, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.116, optim0_lr0=7.955e-05, train_time=6.762 +[gpua003:0/64] 2023-07-07 04:36:30,383 (trainer:732) INFO: 21epoch:train:7601-7700batch: iter_time=9.981e-05, forward_time=0.109, loss_ctc=65.367, loss_att=51.408, acc=0.707, loss=55.596, backward_time=0.753, grad_norm=96.250, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.952e-05, train_time=2.013 +[gpua003:0/64] 2023-07-07 04:38:11,636 (trainer:732) INFO: 21epoch:train:7701-7800batch: iter_time=1.030e-04, forward_time=0.110, loss_ctc=65.290, loss_att=46.653, acc=0.718, loss=52.244, backward_time=0.752, grad_norm=97.699, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.950e-05, train_time=2.025 +[gpua003:0/64] 2023-07-07 04:39:52,657 (trainer:732) INFO: 21epoch:train:7801-7900batch: iter_time=9.248e-05, forward_time=0.116, loss_ctc=79.587, loss_att=57.577, acc=0.697, loss=64.180, backward_time=0.752, grad_norm=90.219, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.948e-05, train_time=2.020 +[gpua003:0/64] 2023-07-07 04:41:35,649 (trainer:732) INFO: 21epoch:train:7901-8000batch: iter_time=6.759e-04, forward_time=0.130, loss_ctc=75.540, loss_att=56.684, acc=0.718, loss=62.341, backward_time=0.758, grad_norm=87.590, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.946e-05, train_time=2.060 +[gpua003:0/64] 2023-07-07 04:43:22,167 (trainer:732) INFO: 21epoch:train:8001-8100batch: iter_time=1.011e-04, forward_time=0.149, loss_ctc=71.159, loss_att=52.801, acc=0.693, loss=58.309, backward_time=0.770, grad_norm=152.348, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.116, optim0_lr0=7.944e-05, train_time=2.130 +[gpua003:0/64] 2023-07-07 04:45:05,978 (trainer:732) INFO: 21epoch:train:8101-8200batch: iter_time=9.418e-05, forward_time=0.140, loss_ctc=76.922, loss_att=55.656, acc=0.697, loss=62.036, backward_time=0.756, grad_norm=115.315, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.114, optim0_lr0=7.942e-05, train_time=2.076 +[gpua003:0/64] 2023-07-07 04:46:48,173 (trainer:732) INFO: 21epoch:train:8201-8300batch: iter_time=1.139e-04, forward_time=0.112, loss_ctc=71.045, loss_att=50.537, acc=0.700, loss=56.690, backward_time=0.751, grad_norm=97.377, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.940e-05, train_time=2.044 +[gpua003:0/64] 2023-07-07 04:47:40,909 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-07 04:48:00,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 04:48:04,000 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 04:48:04,001 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-07 04:48:04,007 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 04:53:34,303 (trainer:732) INFO: 21epoch:train:8301-8400batch: iter_time=2.928, forward_time=0.156, loss_ctc=74.000, loss_att=62.198, acc=0.690, loss=65.739, backward_time=0.775, grad_norm=93.122, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.938e-05, train_time=8.122 +[gpua003:0/64] 2023-07-07 04:55:17,196 (trainer:732) INFO: 21epoch:train:8401-8500batch: iter_time=1.028e-04, forward_time=0.111, loss_ctc=65.302, loss_att=52.812, acc=0.700, loss=56.559, backward_time=0.756, grad_norm=78.595, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.936e-05, train_time=2.058 +[gpua003:0/64] 2023-07-07 04:56:57,424 (trainer:732) INFO: 21epoch:train:8501-8600batch: iter_time=9.658e-05, forward_time=0.108, loss_ctc=68.684, loss_att=51.115, acc=0.721, loss=56.385, backward_time=0.750, grad_norm=82.716, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.934e-05, train_time=2.004 +[gpua003:0/64] 2023-07-07 04:58:38,276 (trainer:732) INFO: 21epoch:train:8601-8700batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=69.325, loss_att=51.087, acc=0.707, loss=56.559, backward_time=0.752, grad_norm=99.216, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.932e-05, train_time=2.017 +[gpua003:0/64] 2023-07-07 05:00:18,316 (trainer:732) INFO: 21epoch:train:8701-8800batch: iter_time=9.697e-05, forward_time=0.109, loss_ctc=73.945, loss_att=57.455, acc=0.694, loss=62.402, backward_time=0.753, grad_norm=89.756, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.930e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 05:01:58,070 (trainer:732) INFO: 21epoch:train:8801-8900batch: iter_time=1.013e-04, forward_time=0.109, loss_ctc=70.106, loss_att=52.451, acc=0.702, loss=57.747, backward_time=0.753, grad_norm=85.297, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.114, optim0_lr0=7.928e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 05:03:37,866 (trainer:732) INFO: 21epoch:train:8901-9000batch: iter_time=9.608e-05, forward_time=0.108, loss_ctc=76.088, loss_att=58.045, acc=0.688, loss=63.458, backward_time=0.752, grad_norm=106.493, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.926e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 05:05:17,660 (trainer:732) INFO: 21epoch:train:9001-9100batch: iter_time=9.909e-05, forward_time=0.108, loss_ctc=80.044, loss_att=57.867, acc=0.695, loss=64.520, backward_time=0.752, grad_norm=118.907, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.924e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 05:06:43,861 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-07 05:07:03,332 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 05:07:07,163 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 05:07:07,164 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-07 05:07:07,170 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 05:11:58,302 (trainer:732) INFO: 21epoch:train:9101-9200batch: iter_time=1.887, forward_time=0.165, loss_ctc=73.392, loss_att=56.440, acc=0.687, loss=61.525, backward_time=0.766, grad_norm=116.633, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.115, optim0_lr0=7.922e-05, train_time=8.012 +[gpua003:0/64] 2023-07-07 05:13:38,749 (trainer:732) INFO: 21epoch:train:9201-9300batch: iter_time=9.300e-05, forward_time=0.107, loss_ctc=67.815, loss_att=50.596, acc=0.716, loss=55.762, backward_time=0.753, grad_norm=79.997, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.920e-05, train_time=2.009 +[gpua003:0/64] 2023-07-07 05:15:19,209 (trainer:732) INFO: 21epoch:train:9301-9400batch: iter_time=1.019e-04, forward_time=0.107, loss_ctc=66.582, loss_att=52.925, acc=0.708, loss=57.022, backward_time=0.752, grad_norm=93.157, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.919e-05, train_time=2.009 +[gpua003:0/64] 2023-07-07 05:16:59,576 (trainer:732) INFO: 21epoch:train:9401-9500batch: iter_time=9.374e-05, forward_time=0.107, loss_ctc=61.813, loss_att=45.211, acc=0.710, loss=50.192, backward_time=0.752, grad_norm=86.096, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.917e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 05:18:40,211 (trainer:732) INFO: 21epoch:train:9501-9600batch: iter_time=9.173e-05, forward_time=0.107, loss_ctc=81.797, loss_att=61.241, acc=0.697, loss=67.408, backward_time=0.752, grad_norm=95.878, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.111, optim0_lr0=7.915e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 05:20:20,733 (trainer:732) INFO: 21epoch:train:9601-9700batch: iter_time=9.476e-05, forward_time=0.107, loss_ctc=72.431, loss_att=53.032, acc=0.704, loss=58.852, backward_time=0.752, grad_norm=86.451, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.913e-05, train_time=2.010 +[gpua003:0/64] 2023-07-07 05:22:00,233 (trainer:732) INFO: 21epoch:train:9701-9800batch: iter_time=9.436e-05, forward_time=0.107, loss_ctc=72.639, loss_att=53.426, acc=0.691, loss=59.190, backward_time=0.751, grad_norm=95.931, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.911e-05, train_time=1.990 +[gpua003:0/64] 2023-07-07 05:23:39,817 (trainer:732) INFO: 21epoch:train:9801-9900batch: iter_time=8.872e-05, forward_time=0.108, loss_ctc=76.251, loss_att=59.146, acc=0.691, loss=64.277, backward_time=0.752, grad_norm=114.109, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.909e-05, train_time=1.991 +[gpua003:0/64] 2023-07-07 05:25:19,445 (trainer:732) INFO: 21epoch:train:9901-10000batch: iter_time=9.037e-05, forward_time=0.107, loss_ctc=73.224, loss_att=50.857, acc=0.694, loss=57.567, backward_time=0.752, grad_norm=105.676, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.907e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 05:38:40,911 (trainer:338) INFO: 21epoch results: [train] iter_time=0.235, forward_time=0.113, loss_ctc=72.893, loss_att=54.497, acc=0.699, loss=60.016, backward_time=0.755, grad_norm=97.715, clip=100.000, loss_scale=2.398e+19, optim_step_time=0.113, optim0_lr0=8.007e-05, train_time=2.656, time=3 hours, 41 minutes and 44.21 seconds, total_count=180000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.186, cer_ctc=0.290, loss_att=41.004, acc=0.654, cer=0.409, wer=0.994, loss=43.759, time=6 minutes and 51.3 seconds, total_count=18722, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 7.7 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-07 05:38:59,977 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-07 05:39:00,026 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/14epoch.pth +[gpua003:0/64] 2023-07-07 05:39:00,083 (trainer:272) INFO: 22/100epoch started. Estimated time to finish: 1 week, 5 days and 17 hours +[gpua003:0/64] 2023-07-07 05:39:01,612 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-07 05:39:20,669 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 05:39:24,275 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 05:39:24,275 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-07 05:39:24,379 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 05:46:09,882 (trainer:732) INFO: 22epoch:train:1-100batch: iter_time=3.232, forward_time=0.130, loss_ctc=75.959, loss_att=57.589, acc=0.700, loss=63.100, backward_time=0.770, grad_norm=94.575, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.905e-05, train_time=8.584 +[gpua003:0/64] 2023-07-07 05:47:51,050 (trainer:732) INFO: 22epoch:train:101-200batch: iter_time=9.855e-05, forward_time=0.109, loss_ctc=68.208, loss_att=52.446, acc=0.682, loss=57.174, backward_time=0.756, grad_norm=97.364, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.903e-05, train_time=2.023 +[gpua003:0/64] 2023-07-07 05:49:30,923 (trainer:732) INFO: 22epoch:train:201-300batch: iter_time=9.754e-05, forward_time=0.110, loss_ctc=76.840, loss_att=59.608, acc=0.719, loss=64.777, backward_time=0.753, grad_norm=95.370, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.901e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 05:51:10,776 (trainer:732) INFO: 22epoch:train:301-400batch: iter_time=9.949e-05, forward_time=0.110, loss_ctc=77.024, loss_att=65.093, acc=0.681, loss=68.673, backward_time=0.752, grad_norm=106.966, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.899e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 05:52:55,638 (trainer:732) INFO: 22epoch:train:401-500batch: iter_time=9.878e-05, forward_time=0.109, loss_ctc=70.205, loss_att=56.739, acc=0.696, loss=60.779, backward_time=0.761, grad_norm=90.649, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.897e-05, train_time=2.097 +[gpua003:0/64] 2023-07-07 05:54:39,214 (trainer:732) INFO: 22epoch:train:501-600batch: iter_time=1.011e-04, forward_time=0.109, loss_ctc=67.033, loss_att=51.629, acc=0.698, loss=56.250, backward_time=0.754, grad_norm=93.014, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.895e-05, train_time=2.071 +[gpua003:0/64] 2023-07-07 05:56:20,364 (trainer:732) INFO: 22epoch:train:601-700batch: iter_time=1.016e-04, forward_time=0.109, loss_ctc=65.903, loss_att=46.926, acc=0.685, loss=52.619, backward_time=0.751, grad_norm=81.828, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.893e-05, train_time=2.023 +[gpua003:0/64] 2023-07-07 05:58:00,763 (trainer:732) INFO: 22epoch:train:701-800batch: iter_time=9.919e-05, forward_time=0.108, loss_ctc=77.598, loss_att=57.244, acc=0.698, loss=63.350, backward_time=0.751, grad_norm=97.156, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.891e-05, train_time=2.008 +[gpua003:0/64] 2023-07-07 05:58:40,682 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-07 05:58:59,667 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 05:59:03,212 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 05:59:03,212 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-07 05:59:03,218 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 06:03:49,036 (trainer:732) INFO: 22epoch:train:801-900batch: iter_time=1.356, forward_time=0.139, loss_ctc=73.151, loss_att=56.666, acc=0.700, loss=61.612, backward_time=0.769, grad_norm=98.640, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.889e-05, train_time=6.965 +[gpua003:0/64] 2023-07-07 06:05:29,630 (trainer:732) INFO: 22epoch:train:901-1000batch: iter_time=9.770e-05, forward_time=0.110, loss_ctc=68.300, loss_att=50.098, acc=0.687, loss=55.559, backward_time=0.754, grad_norm=100.634, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.887e-05, train_time=2.012 +[gpua003:0/64] 2023-07-07 06:07:09,505 (trainer:732) INFO: 22epoch:train:1001-1100batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=73.251, loss_att=59.729, acc=0.702, loss=63.786, backward_time=0.752, grad_norm=99.591, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.885e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 06:08:52,104 (trainer:732) INFO: 22epoch:train:1101-1200batch: iter_time=1.038e-04, forward_time=0.109, loss_ctc=71.657, loss_att=55.743, acc=0.704, loss=60.517, backward_time=0.765, grad_norm=85.238, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.883e-05, train_time=2.052 +[gpua003:0/64] 2023-07-07 06:10:32,190 (trainer:732) INFO: 22epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.109, loss_ctc=76.079, loss_att=65.762, acc=0.682, loss=68.857, backward_time=0.753, grad_norm=102.836, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.881e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 06:12:12,065 (trainer:732) INFO: 22epoch:train:1301-1400batch: iter_time=9.626e-05, forward_time=0.109, loss_ctc=69.609, loss_att=55.901, acc=0.688, loss=60.014, backward_time=0.753, grad_norm=115.398, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.879e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 06:13:52,048 (trainer:732) INFO: 22epoch:train:1401-1500batch: iter_time=8.880e-05, forward_time=0.109, loss_ctc=61.548, loss_att=44.239, acc=0.684, loss=49.431, backward_time=0.753, grad_norm=108.769, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.877e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 06:15:31,817 (trainer:732) INFO: 22epoch:train:1501-1600batch: iter_time=9.085e-05, forward_time=0.109, loss_ctc=73.610, loss_att=51.494, acc=0.698, loss=58.129, backward_time=0.753, grad_norm=95.261, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.875e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 06:16:54,392 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-07 06:17:13,867 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 06:17:17,499 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 06:17:17,499 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-07 06:17:17,506 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 06:21:41,543 (trainer:732) INFO: 22epoch:train:1601-1700batch: iter_time=2.544, forward_time=0.118, loss_ctc=71.770, loss_att=56.335, acc=0.704, loss=60.965, backward_time=0.763, grad_norm=88.321, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.873e-05, train_time=7.394 +[gpua003:0/64] 2023-07-07 06:23:24,648 (trainer:732) INFO: 22epoch:train:1701-1800batch: iter_time=8.832e-05, forward_time=0.108, loss_ctc=72.678, loss_att=52.151, acc=0.698, loss=58.309, backward_time=0.758, grad_norm=95.685, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.871e-05, train_time=2.062 +[gpua003:0/64] 2023-07-07 06:25:06,463 (trainer:732) INFO: 22epoch:train:1801-1900batch: iter_time=9.287e-05, forward_time=0.122, loss_ctc=66.852, loss_att=52.819, acc=0.716, loss=57.029, backward_time=0.755, grad_norm=93.289, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.869e-05, train_time=2.036 +[gpua003:0/64] 2023-07-07 06:26:47,146 (trainer:732) INFO: 22epoch:train:1901-2000batch: iter_time=1.001e-04, forward_time=0.114, loss_ctc=77.777, loss_att=64.279, acc=0.708, loss=68.329, backward_time=0.754, grad_norm=107.242, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.867e-05, train_time=2.013 +[gpua003:0/64] 2023-07-07 06:28:27,163 (trainer:732) INFO: 22epoch:train:2001-2100batch: iter_time=9.088e-05, forward_time=0.109, loss_ctc=75.746, loss_att=62.285, acc=0.691, loss=66.323, backward_time=0.754, grad_norm=93.071, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.865e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 06:30:09,221 (trainer:732) INFO: 22epoch:train:2101-2200batch: iter_time=8.882e-05, forward_time=0.123, loss_ctc=70.706, loss_att=58.251, acc=0.693, loss=61.987, backward_time=0.758, grad_norm=99.862, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.863e-05, train_time=2.041 +[gpua003:0/64] 2023-07-07 06:31:49,013 (trainer:732) INFO: 22epoch:train:2201-2300batch: iter_time=8.759e-05, forward_time=0.108, loss_ctc=56.761, loss_att=45.017, acc=0.696, loss=48.540, backward_time=0.752, grad_norm=80.159, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.862e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 06:33:28,852 (trainer:732) INFO: 22epoch:train:2301-2400batch: iter_time=9.062e-05, forward_time=0.108, loss_ctc=69.849, loss_att=49.995, acc=0.703, loss=55.951, backward_time=0.751, grad_norm=112.615, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.860e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 06:35:09,707 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-07 06:35:28,850 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 06:35:32,446 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 06:35:32,446 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua003:0/64] 2023-07-07 06:35:32,453 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 06:39:06,065 (trainer:732) INFO: 22epoch:train:2401-2500batch: iter_time=1.292, forward_time=0.109, loss_ctc=71.798, loss_att=50.317, acc=0.717, loss=56.761, backward_time=0.759, grad_norm=95.019, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.858e-05, train_time=6.744 +[gpua003:0/64] 2023-07-07 06:40:51,328 (trainer:732) INFO: 22epoch:train:2501-2600batch: iter_time=1.031e-04, forward_time=0.109, loss_ctc=75.859, loss_att=56.028, acc=0.710, loss=61.978, backward_time=0.760, grad_norm=95.952, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.856e-05, train_time=2.105 +[gpua003:0/64] 2023-07-07 06:42:31,354 (trainer:732) INFO: 22epoch:train:2601-2700batch: iter_time=1.082e-04, forward_time=0.108, loss_ctc=66.166, loss_att=50.393, acc=0.695, loss=55.125, backward_time=0.751, grad_norm=86.983, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.854e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 06:44:10,962 (trainer:732) INFO: 22epoch:train:2701-2800batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=77.042, loss_att=58.748, acc=0.721, loss=64.236, backward_time=0.750, grad_norm=99.652, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.852e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 06:45:50,837 (trainer:732) INFO: 22epoch:train:2801-2900batch: iter_time=1.088e-04, forward_time=0.109, loss_ctc=73.184, loss_att=61.853, acc=0.694, loss=65.253, backward_time=0.752, grad_norm=101.544, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.850e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 06:47:30,553 (trainer:732) INFO: 22epoch:train:2901-3000batch: iter_time=1.154e-04, forward_time=0.109, loss_ctc=70.952, loss_att=57.515, acc=0.703, loss=61.546, backward_time=0.751, grad_norm=97.436, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.848e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 06:49:10,499 (trainer:732) INFO: 22epoch:train:3001-3100batch: iter_time=1.189e-04, forward_time=0.109, loss_ctc=62.039, loss_att=47.304, acc=0.712, loss=51.724, backward_time=0.753, grad_norm=90.810, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.846e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 06:50:50,138 (trainer:732) INFO: 22epoch:train:3101-3200batch: iter_time=1.267e-04, forward_time=0.109, loss_ctc=66.663, loss_att=46.048, acc=0.693, loss=52.232, backward_time=0.750, grad_norm=86.756, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.844e-05, train_time=1.993 +[gpua003:0/64] 2023-07-07 06:52:32,525 (trainer:732) INFO: 22epoch:train:3201-3300batch: iter_time=1.227e-04, forward_time=0.110, loss_ctc=72.678, loss_att=54.618, acc=0.711, loss=60.036, backward_time=0.757, grad_norm=104.682, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.842e-05, train_time=2.048 +[gpua003:0/64] 2023-07-07 06:53:06,741 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-07 06:53:26,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 06:53:29,609 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 06:53:29,609 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua003:0/64] 2023-07-07 06:53:30,022 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 06:57:58,983 (trainer:732) INFO: 22epoch:train:3301-3400batch: iter_time=1.348, forward_time=0.145, loss_ctc=72.094, loss_att=56.366, acc=0.703, loss=61.085, backward_time=0.769, grad_norm=89.951, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.840e-05, train_time=6.529 +[gpua003:0/64] 2023-07-07 06:59:39,298 (trainer:732) INFO: 22epoch:train:3401-3500batch: iter_time=1.038e-04, forward_time=0.110, loss_ctc=66.447, loss_att=49.854, acc=0.686, loss=54.832, backward_time=0.753, grad_norm=93.159, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.838e-05, train_time=2.006 +[gpua003:0/64] 2023-07-07 07:01:19,264 (trainer:732) INFO: 22epoch:train:3501-3600batch: iter_time=9.217e-05, forward_time=0.109, loss_ctc=75.507, loss_att=61.068, acc=0.704, loss=65.400, backward_time=0.751, grad_norm=96.972, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.836e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 07:02:59,198 (trainer:732) INFO: 22epoch:train:3601-3700batch: iter_time=1.088e-04, forward_time=0.109, loss_ctc=70.999, loss_att=57.684, acc=0.705, loss=61.678, backward_time=0.752, grad_norm=85.253, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.834e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 07:04:38,930 (trainer:732) INFO: 22epoch:train:3701-3800batch: iter_time=1.136e-04, forward_time=0.109, loss_ctc=72.958, loss_att=61.007, acc=0.688, loss=64.592, backward_time=0.750, grad_norm=110.731, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.833e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 07:06:18,662 (trainer:732) INFO: 22epoch:train:3801-3900batch: iter_time=1.150e-04, forward_time=0.109, loss_ctc=69.973, loss_att=57.379, acc=0.688, loss=61.157, backward_time=0.751, grad_norm=104.773, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.831e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 07:07:58,470 (trainer:732) INFO: 22epoch:train:3901-4000batch: iter_time=1.153e-04, forward_time=0.109, loss_ctc=58.578, loss_att=42.394, acc=0.697, loss=47.249, backward_time=0.752, grad_norm=78.336, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.829e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 07:09:38,304 (trainer:732) INFO: 22epoch:train:4001-4100batch: iter_time=9.816e-05, forward_time=0.108, loss_ctc=72.183, loss_att=51.473, acc=0.699, loss=57.686, backward_time=0.753, grad_norm=90.776, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.827e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 07:10:44,632 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-07 07:11:04,150 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 07:11:07,729 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 07:11:07,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua003:0/64] 2023-07-07 07:11:07,735 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 07:14:13,080 (trainer:732) INFO: 22epoch:train:4101-4200batch: iter_time=1.318, forward_time=0.108, loss_ctc=71.941, loss_att=54.810, acc=0.713, loss=59.949, backward_time=0.764, grad_norm=86.300, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.825e-05, train_time=5.495 +[gpua003:0/64] 2023-07-07 07:15:54,008 (trainer:732) INFO: 22epoch:train:4201-4300batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=70.358, loss_att=50.448, acc=0.689, loss=56.421, backward_time=0.755, grad_norm=99.875, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.823e-05, train_time=2.018 +[gpua003:0/64] 2023-07-07 07:17:35,127 (trainer:732) INFO: 22epoch:train:4301-4400batch: iter_time=1.074e-04, forward_time=0.108, loss_ctc=70.078, loss_att=57.577, acc=0.697, loss=61.327, backward_time=0.751, grad_norm=93.447, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.821e-05, train_time=2.022 +[gpua003:0/64] 2023-07-07 07:19:17,039 (trainer:732) INFO: 22epoch:train:4401-4500batch: iter_time=8.866e-05, forward_time=0.108, loss_ctc=73.720, loss_att=60.764, acc=0.703, loss=64.650, backward_time=0.759, grad_norm=98.011, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.819e-05, train_time=2.038 +[gpua003:0/64] 2023-07-07 07:20:57,799 (trainer:732) INFO: 22epoch:train:4501-4600batch: iter_time=9.315e-05, forward_time=0.110, loss_ctc=74.002, loss_att=60.934, acc=0.685, loss=64.855, backward_time=0.754, grad_norm=99.614, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.817e-05, train_time=2.015 +[gpua003:0/64] 2023-07-07 07:22:39,059 (trainer:732) INFO: 22epoch:train:4601-4700batch: iter_time=1.059e-04, forward_time=0.109, loss_ctc=69.200, loss_att=56.517, acc=0.691, loss=60.322, backward_time=0.752, grad_norm=101.222, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.815e-05, train_time=2.025 +[gpua003:0/64] 2023-07-07 07:24:20,772 (trainer:732) INFO: 22epoch:train:4701-4800batch: iter_time=1.095e-04, forward_time=0.109, loss_ctc=61.538, loss_att=45.833, acc=0.689, loss=50.545, backward_time=0.754, grad_norm=95.781, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.813e-05, train_time=2.034 +[gpua003:0/64] 2023-07-07 07:26:02,965 (trainer:732) INFO: 22epoch:train:4801-4900batch: iter_time=1.033e-04, forward_time=0.109, loss_ctc=69.082, loss_att=51.514, acc=0.696, loss=56.785, backward_time=0.752, grad_norm=95.683, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.811e-05, train_time=2.044 +[gpua003:0/64] 2023-07-07 07:27:49,767 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-07 07:28:09,330 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 07:28:12,926 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 07:28:12,926 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua003:0/64] 2023-07-07 07:28:12,933 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 07:31:57,955 (trainer:732) INFO: 22epoch:train:4901-5000batch: iter_time=2.503, forward_time=0.127, loss_ctc=69.914, loss_att=52.926, acc=0.705, loss=58.022, backward_time=0.762, grad_norm=90.567, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.810e-05, train_time=7.100 +[gpua003:0/64] 2023-07-07 07:33:40,006 (trainer:732) INFO: 22epoch:train:5001-5100batch: iter_time=1.018e-04, forward_time=0.109, loss_ctc=75.894, loss_att=56.058, acc=0.706, loss=62.009, backward_time=0.760, grad_norm=95.605, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.808e-05, train_time=2.041 +[gpua003:0/64] 2023-07-07 07:35:20,731 (trainer:732) INFO: 22epoch:train:5101-5200batch: iter_time=1.041e-04, forward_time=0.108, loss_ctc=65.260, loss_att=50.518, acc=0.693, loss=54.941, backward_time=0.754, grad_norm=95.597, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.806e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 07:37:03,337 (trainer:732) INFO: 22epoch:train:5201-5300batch: iter_time=9.542e-05, forward_time=0.109, loss_ctc=75.495, loss_att=58.850, acc=0.712, loss=63.844, backward_time=0.755, grad_norm=96.146, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.804e-05, train_time=2.052 +[gpua003:0/64] 2023-07-07 07:38:52,102 (trainer:732) INFO: 22epoch:train:5301-5400batch: iter_time=9.699e-05, forward_time=0.109, loss_ctc=72.248, loss_att=62.576, acc=0.684, loss=65.477, backward_time=0.759, grad_norm=109.912, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.802e-05, train_time=2.175 +[gpua003:0/64] 2023-07-07 07:40:47,676 (trainer:732) INFO: 22epoch:train:5401-5500batch: iter_time=9.981e-05, forward_time=0.108, loss_ctc=69.974, loss_att=56.541, acc=0.690, loss=60.571, backward_time=0.802, grad_norm=96.604, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.800e-05, train_time=2.311 +[gpua003:0/64] 2023-07-07 07:42:29,427 (trainer:732) INFO: 22epoch:train:5501-5600batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=62.758, loss_att=47.686, acc=0.702, loss=52.208, backward_time=0.762, grad_norm=88.346, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.798e-05, train_time=2.035 +[gpua003:0/64] 2023-07-07 07:44:09,193 (trainer:732) INFO: 22epoch:train:5601-5700batch: iter_time=1.001e-04, forward_time=0.109, loss_ctc=65.100, loss_att=45.891, acc=0.696, loss=51.654, backward_time=0.751, grad_norm=81.829, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.796e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 07:45:53,335 (trainer:732) INFO: 22epoch:train:5701-5800batch: iter_time=9.999e-05, forward_time=0.108, loss_ctc=74.308, loss_att=57.186, acc=0.695, loss=62.323, backward_time=0.767, grad_norm=150.463, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.794e-05, train_time=2.083 +[gpua003:0/64] 2023-07-07 07:46:33,767 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua003:0/64] 2023-07-07 07:46:52,851 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 07:46:56,470 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 07:46:56,470 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-07 07:46:56,477 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 07:50:29,089 (trainer:732) INFO: 22epoch:train:5801-5900batch: iter_time=1.584, forward_time=0.131, loss_ctc=74.207, loss_att=54.322, acc=0.717, loss=60.288, backward_time=0.764, grad_norm=91.726, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.792e-05, train_time=5.515 +[gpua003:0/64] 2023-07-07 07:52:09,734 (trainer:732) INFO: 22epoch:train:5901-6000batch: iter_time=9.734e-05, forward_time=0.110, loss_ctc=65.678, loss_att=47.794, acc=0.691, loss=53.160, backward_time=0.752, grad_norm=83.021, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.791e-05, train_time=2.013 +[gpua003:0/64] 2023-07-07 07:53:50,393 (trainer:732) INFO: 22epoch:train:6001-6100batch: iter_time=9.193e-05, forward_time=0.112, loss_ctc=74.747, loss_att=60.495, acc=0.721, loss=64.771, backward_time=0.755, grad_norm=90.014, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.789e-05, train_time=2.013 +[gpua003:0/64] 2023-07-07 07:55:31,643 (trainer:732) INFO: 22epoch:train:6101-6200batch: iter_time=9.239e-05, forward_time=0.119, loss_ctc=73.282, loss_att=60.349, acc=0.698, loss=64.229, backward_time=0.756, grad_norm=88.930, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.787e-05, train_time=2.025 +[gpua003:0/64] 2023-07-07 07:57:11,271 (trainer:732) INFO: 22epoch:train:6201-6300batch: iter_time=9.293e-05, forward_time=0.108, loss_ctc=67.412, loss_att=54.664, acc=0.707, loss=58.488, backward_time=0.751, grad_norm=95.356, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.785e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 07:58:51,149 (trainer:732) INFO: 22epoch:train:6301-6400batch: iter_time=9.666e-05, forward_time=0.109, loss_ctc=66.473, loss_att=51.695, acc=0.714, loss=56.129, backward_time=0.751, grad_norm=103.423, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.783e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 08:00:31,053 (trainer:732) INFO: 22epoch:train:6401-6500batch: iter_time=9.940e-05, forward_time=0.110, loss_ctc=61.632, loss_att=44.108, acc=0.695, loss=49.365, backward_time=0.752, grad_norm=86.270, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.781e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 08:02:10,665 (trainer:732) INFO: 22epoch:train:6501-6600batch: iter_time=1.027e-04, forward_time=0.109, loss_ctc=72.139, loss_att=51.933, acc=0.712, loss=57.995, backward_time=0.750, grad_norm=96.771, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.779e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 08:03:18,465 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua003:0/64] 2023-07-07 08:03:37,874 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 08:03:41,509 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 08:03:41,509 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua003:0/64] 2023-07-07 08:03:41,515 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 08:07:00,984 (trainer:732) INFO: 22epoch:train:6601-6700batch: iter_time=1.390, forward_time=0.109, loss_ctc=70.332, loss_att=55.203, acc=0.706, loss=59.742, backward_time=0.765, grad_norm=97.451, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.777e-05, train_time=5.806 +[gpua003:0/64] 2023-07-07 08:08:45,065 (trainer:732) INFO: 22epoch:train:6701-6800batch: iter_time=9.209e-05, forward_time=0.108, loss_ctc=72.039, loss_att=50.836, acc=0.705, loss=57.197, backward_time=0.762, grad_norm=96.664, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.776e-05, train_time=2.081 +[gpua003:0/64] 2023-07-07 08:10:27,886 (trainer:732) INFO: 22epoch:train:6801-6900batch: iter_time=9.907e-05, forward_time=0.109, loss_ctc=66.153, loss_att=54.046, acc=0.709, loss=57.678, backward_time=0.754, grad_norm=91.951, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.774e-05, train_time=2.056 +[gpua003:0/64] 2023-07-07 08:12:07,728 (trainer:732) INFO: 22epoch:train:6901-7000batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=76.287, loss_att=59.045, acc=0.713, loss=64.218, backward_time=0.752, grad_norm=110.971, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.772e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 08:13:52,415 (trainer:732) INFO: 22epoch:train:7001-7100batch: iter_time=9.646e-05, forward_time=0.110, loss_ctc=72.777, loss_att=62.041, acc=0.682, loss=65.261, backward_time=0.770, grad_norm=98.939, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.770e-05, train_time=2.094 +[gpua003:0/64] 2023-07-07 08:15:37,878 (trainer:732) INFO: 22epoch:train:7101-7200batch: iter_time=9.875e-05, forward_time=0.109, loss_ctc=69.184, loss_att=56.850, acc=0.686, loss=60.550, backward_time=0.767, grad_norm=104.102, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.768e-05, train_time=2.109 +[gpua003:0/64] 2023-07-07 08:17:18,027 (trainer:732) INFO: 22epoch:train:7201-7300batch: iter_time=9.466e-05, forward_time=0.108, loss_ctc=56.723, loss_att=44.561, acc=0.694, loss=48.210, backward_time=0.751, grad_norm=82.134, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.766e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 08:19:11,959 (trainer:732) INFO: 22epoch:train:7301-7400batch: iter_time=9.554e-05, forward_time=0.109, loss_ctc=68.916, loss_att=48.970, acc=0.708, loss=54.954, backward_time=0.805, grad_norm=145.408, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.764e-05, train_time=2.278 +[gpua003:0/64] 2023-07-07 08:20:53,519 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua003:0/64] 2023-07-07 08:21:12,743 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 08:21:16,313 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 08:21:16,313 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-07 08:21:16,320 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 08:23:45,003 (trainer:732) INFO: 22epoch:train:7401-7500batch: iter_time=1.329, forward_time=0.136, loss_ctc=70.359, loss_att=50.850, acc=0.717, loss=56.703, backward_time=0.762, grad_norm=84.034, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.116, optim0_lr0=7.762e-05, train_time=5.461 +[gpua003:0/64] 2023-07-07 08:25:27,825 (trainer:732) INFO: 22epoch:train:7501-7600batch: iter_time=8.708e-05, forward_time=0.110, loss_ctc=76.765, loss_att=54.087, acc=0.711, loss=60.891, backward_time=0.763, grad_norm=98.850, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.761e-05, train_time=2.056 +[gpua003:0/64] 2023-07-07 08:27:08,334 (trainer:732) INFO: 22epoch:train:7601-7700batch: iter_time=1.042e-04, forward_time=0.109, loss_ctc=64.296, loss_att=50.883, acc=0.706, loss=54.907, backward_time=0.754, grad_norm=87.185, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.759e-05, train_time=2.010 +[gpua003:0/64] 2023-07-07 08:28:48,255 (trainer:732) INFO: 22epoch:train:7701-7800batch: iter_time=8.846e-05, forward_time=0.108, loss_ctc=74.565, loss_att=60.399, acc=0.719, loss=64.649, backward_time=0.752, grad_norm=95.857, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.757e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 08:30:28,045 (trainer:732) INFO: 22epoch:train:7801-7900batch: iter_time=1.037e-04, forward_time=0.109, loss_ctc=73.972, loss_att=62.893, acc=0.689, loss=66.217, backward_time=0.752, grad_norm=87.398, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.755e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 08:32:07,809 (trainer:732) INFO: 22epoch:train:7901-8000batch: iter_time=1.089e-04, forward_time=0.110, loss_ctc=70.642, loss_att=57.664, acc=0.693, loss=61.557, backward_time=0.752, grad_norm=92.959, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.753e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 08:33:47,492 (trainer:732) INFO: 22epoch:train:8001-8100batch: iter_time=1.063e-04, forward_time=0.109, loss_ctc=55.497, loss_att=44.132, acc=0.708, loss=47.541, backward_time=0.752, grad_norm=80.785, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.751e-05, train_time=1.993 +[gpua003:0/64] 2023-07-07 08:35:27,353 (trainer:732) INFO: 22epoch:train:8101-8200batch: iter_time=1.061e-04, forward_time=0.111, loss_ctc=67.689, loss_att=46.225, acc=0.704, loss=52.664, backward_time=0.752, grad_norm=103.286, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.749e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 08:37:07,180 (trainer:732) INFO: 22epoch:train:8201-8300batch: iter_time=1.080e-04, forward_time=0.111, loss_ctc=72.512, loss_att=53.632, acc=0.717, loss=59.296, backward_time=0.752, grad_norm=82.921, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.747e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 08:37:49,047 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua003:0/64] 2023-07-07 08:38:08,430 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 08:38:12,343 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 08:38:12,343 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-07 08:38:12,349 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 08:43:42,647 (trainer:732) INFO: 22epoch:train:8301-8400batch: iter_time=2.865, forward_time=0.127, loss_ctc=76.998, loss_att=56.248, acc=0.715, loss=62.473, backward_time=0.764, grad_norm=91.921, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.746e-05, train_time=7.909 +[gpua003:0/64] 2023-07-07 08:45:23,857 (trainer:732) INFO: 22epoch:train:8401-8500batch: iter_time=9.852e-05, forward_time=0.109, loss_ctc=64.243, loss_att=47.232, acc=0.693, loss=52.335, backward_time=0.754, grad_norm=77.637, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.744e-05, train_time=2.024 +[gpua003:0/64] 2023-07-07 08:47:04,339 (trainer:732) INFO: 22epoch:train:8501-8600batch: iter_time=1.040e-04, forward_time=0.109, loss_ctc=74.869, loss_att=61.475, acc=0.714, loss=65.493, backward_time=0.753, grad_norm=93.438, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.742e-05, train_time=2.009 +[gpua003:0/64] 2023-07-07 08:48:43,954 (trainer:732) INFO: 22epoch:train:8601-8700batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=73.122, loss_att=61.953, acc=0.688, loss=65.304, backward_time=0.751, grad_norm=96.667, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.740e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 08:50:26,881 (trainer:732) INFO: 22epoch:train:8701-8800batch: iter_time=1.126e-04, forward_time=0.108, loss_ctc=66.145, loss_att=55.031, acc=0.694, loss=58.365, backward_time=0.754, grad_norm=94.273, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.738e-05, train_time=2.058 +[gpua003:0/64] 2023-07-07 08:52:06,631 (trainer:732) INFO: 22epoch:train:8801-8900batch: iter_time=1.047e-04, forward_time=0.108, loss_ctc=66.295, loss_att=51.963, acc=0.699, loss=56.262, backward_time=0.751, grad_norm=94.275, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.736e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 08:53:46,418 (trainer:732) INFO: 22epoch:train:8901-9000batch: iter_time=9.829e-05, forward_time=0.108, loss_ctc=61.909, loss_att=43.782, acc=0.697, loss=49.220, backward_time=0.752, grad_norm=82.673, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.734e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 08:55:26,132 (trainer:732) INFO: 22epoch:train:9001-9100batch: iter_time=1.043e-04, forward_time=0.108, loss_ctc=71.915, loss_att=53.753, acc=0.707, loss=59.202, backward_time=0.751, grad_norm=109.132, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.733e-05, train_time=1.994 +[gpua003:0/64] 2023-07-07 08:56:34,455 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua003:0/64] 2023-07-07 08:56:53,892 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 08:56:57,546 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 08:56:57,546 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-07 08:56:57,553 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 09:00:31,061 (trainer:732) INFO: 22epoch:train:9101-9200batch: iter_time=1.431, forward_time=0.127, loss_ctc=71.039, loss_att=54.915, acc=0.705, loss=59.752, backward_time=0.761, grad_norm=106.663, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.731e-05, train_time=6.098 +[gpua003:0/64] 2023-07-07 09:02:14,178 (trainer:732) INFO: 22epoch:train:9201-9300batch: iter_time=1.042e-04, forward_time=0.122, loss_ctc=72.597, loss_att=52.275, acc=0.703, loss=58.372, backward_time=0.760, grad_norm=105.215, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.115, optim0_lr0=7.729e-05, train_time=2.062 +[gpua003:0/64] 2023-07-07 09:03:55,492 (trainer:732) INFO: 22epoch:train:9301-9400batch: iter_time=1.019e-04, forward_time=0.111, loss_ctc=66.483, loss_att=52.418, acc=0.717, loss=56.638, backward_time=0.755, grad_norm=102.064, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.727e-05, train_time=2.026 +[gpua003:0/64] 2023-07-07 09:05:35,862 (trainer:732) INFO: 22epoch:train:9401-9500batch: iter_time=1.021e-04, forward_time=0.110, loss_ctc=75.311, loss_att=58.987, acc=0.722, loss=63.884, backward_time=0.752, grad_norm=84.201, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.725e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 09:07:16,074 (trainer:732) INFO: 22epoch:train:9501-9600batch: iter_time=9.991e-05, forward_time=0.111, loss_ctc=71.239, loss_att=61.029, acc=0.693, loss=64.092, backward_time=0.754, grad_norm=112.479, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.723e-05, train_time=2.004 +[gpua003:0/64] 2023-07-07 09:08:55,956 (trainer:732) INFO: 22epoch:train:9601-9700batch: iter_time=1.011e-04, forward_time=0.111, loss_ctc=69.983, loss_att=56.365, acc=0.703, loss=60.450, backward_time=0.753, grad_norm=101.191, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.722e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 09:10:39,207 (trainer:732) INFO: 22epoch:train:9701-9800batch: iter_time=1.137e-04, forward_time=0.129, loss_ctc=55.743, loss_att=43.049, acc=0.703, loss=46.857, backward_time=0.756, grad_norm=90.349, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.124, optim0_lr0=7.720e-05, train_time=2.065 +[gpua003:0/64] 2023-07-07 09:12:19,264 (trainer:732) INFO: 22epoch:train:9801-9900batch: iter_time=9.062e-05, forward_time=0.110, loss_ctc=69.518, loss_att=49.134, acc=0.711, loss=55.249, backward_time=0.753, grad_norm=100.063, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.718e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 09:14:01,810 (trainer:732) INFO: 22epoch:train:9901-10000batch: iter_time=9.648e-05, forward_time=0.129, loss_ctc=69.398, loss_att=49.229, acc=0.721, loss=55.280, backward_time=0.756, grad_norm=90.245, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.716e-05, train_time=2.051 +[gpua003:0/64] 2023-07-07 09:26:51,892 (trainer:338) INFO: 22epoch results: [train] iter_time=0.222, forward_time=0.112, loss_ctc=70.052, loss_att=54.212, acc=0.701, loss=58.964, backward_time=0.757, grad_norm=96.262, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.809e-05, train_time=2.580, time=3 hours, 35 minutes and 15.27 seconds, total_count=190000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=53.056, cer_ctc=0.291, loss_att=42.969, acc=0.658, cer=0.388, wer=0.991, loss=45.995, time=6 minutes and 5.25 seconds, total_count=19734, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 31.11 seconds, total_count=0, gpu_max_cached_mem_GB=37.779 +[gpua003:0/64] 2023-07-07 09:27:11,470 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpua003:0/64] 2023-07-07 09:27:11,478 (trainer:272) INFO: 23/100epoch started. Estimated time to finish: 1 week, 5 days and 13 hours +[gpua003:0/64] 2023-07-07 09:27:12,495 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua003:0/64] 2023-07-07 09:27:32,952 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 09:27:36,862 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 09:27:36,865 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua003:0/64] 2023-07-07 09:27:36,955 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 09:34:04,979 (trainer:732) INFO: 23epoch:train:1-100batch: iter_time=3.070, forward_time=0.136, loss_ctc=73.179, loss_att=57.756, acc=0.704, loss=62.383, backward_time=0.766, grad_norm=99.480, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.115, optim0_lr0=7.714e-05, train_time=8.259 +[gpua003:0/64] 2023-07-07 09:35:47,590 (trainer:732) INFO: 23epoch:train:101-200batch: iter_time=9.809e-05, forward_time=0.110, loss_ctc=65.447, loss_att=55.548, acc=0.684, loss=58.518, backward_time=0.757, grad_norm=105.042, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.712e-05, train_time=2.052 +[gpua003:0/64] 2023-07-07 09:37:44,269 (trainer:732) INFO: 23epoch:train:201-300batch: iter_time=2.992e-04, forward_time=0.200, loss_ctc=91.477, loss_att=64.962, acc=0.703, loss=72.917, backward_time=0.765, grad_norm=137.116, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.130, optim0_lr0=7.711e-05, train_time=2.331 +[gpua003:0/64] 2023-07-07 09:39:30,373 (trainer:732) INFO: 23epoch:train:301-400batch: iter_time=2.111e-04, forward_time=0.143, loss_ctc=74.075, loss_att=60.821, acc=0.698, loss=64.797, backward_time=0.764, grad_norm=100.292, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.118, optim0_lr0=7.709e-05, train_time=2.124 +[gpua003:0/64] 2023-07-07 09:41:13,338 (trainer:732) INFO: 23epoch:train:401-500batch: iter_time=9.774e-05, forward_time=0.108, loss_ctc=78.934, loss_att=61.524, acc=0.709, loss=66.747, backward_time=0.755, grad_norm=111.165, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.707e-05, train_time=2.059 +[gpua003:0/64] 2023-07-07 09:42:53,514 (trainer:732) INFO: 23epoch:train:501-600batch: iter_time=9.807e-05, forward_time=0.108, loss_ctc=69.498, loss_att=53.920, acc=0.698, loss=58.594, backward_time=0.752, grad_norm=97.973, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.705e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 09:44:41,445 (trainer:732) INFO: 23epoch:train:601-700batch: iter_time=8.987e-05, forward_time=0.108, loss_ctc=83.563, loss_att=61.774, acc=0.691, loss=68.311, backward_time=0.762, grad_norm=122.324, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.703e-05, train_time=2.158 +[gpua003:0/64] 2023-07-07 09:46:26,252 (trainer:732) INFO: 23epoch:train:701-800batch: iter_time=9.492e-05, forward_time=0.109, loss_ctc=74.540, loss_att=56.661, acc=0.697, loss=62.025, backward_time=0.755, grad_norm=102.834, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.701e-05, train_time=2.096 +[gpua003:0/64] 2023-07-07 09:47:10,710 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua003:0/64] 2023-07-07 09:47:30,186 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 09:47:34,091 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 09:47:34,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua003:0/64] 2023-07-07 09:47:34,151 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 09:53:24,106 (trainer:732) INFO: 23epoch:train:801-900batch: iter_time=2.968, forward_time=0.137, loss_ctc=71.883, loss_att=53.675, acc=0.701, loss=59.138, backward_time=0.769, grad_norm=96.823, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.700e-05, train_time=8.356 +[gpua003:0/64] 2023-07-07 09:55:05,103 (trainer:732) INFO: 23epoch:train:901-1000batch: iter_time=1.047e-04, forward_time=0.110, loss_ctc=64.340, loss_att=51.496, acc=0.693, loss=55.349, backward_time=0.754, grad_norm=93.049, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.698e-05, train_time=2.020 +[gpua003:0/64] 2023-07-07 09:56:45,333 (trainer:732) INFO: 23epoch:train:1001-1100batch: iter_time=9.897e-05, forward_time=0.109, loss_ctc=82.590, loss_att=64.984, acc=0.698, loss=70.266, backward_time=0.751, grad_norm=115.985, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.696e-05, train_time=2.004 +[gpua003:0/64] 2023-07-07 09:58:25,170 (trainer:732) INFO: 23epoch:train:1101-1200batch: iter_time=9.800e-05, forward_time=0.109, loss_ctc=74.904, loss_att=57.611, acc=0.704, loss=62.799, backward_time=0.753, grad_norm=93.699, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.694e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 10:00:05,128 (trainer:732) INFO: 23epoch:train:1201-1300batch: iter_time=1.040e-04, forward_time=0.110, loss_ctc=77.700, loss_att=63.938, acc=0.709, loss=68.066, backward_time=0.753, grad_norm=120.473, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.692e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 10:01:45,015 (trainer:732) INFO: 23epoch:train:1301-1400batch: iter_time=1.311e-04, forward_time=0.110, loss_ctc=66.958, loss_att=50.399, acc=0.716, loss=55.367, backward_time=0.753, grad_norm=104.203, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.690e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 10:03:25,092 (trainer:732) INFO: 23epoch:train:1401-1500batch: iter_time=1.131e-04, forward_time=0.111, loss_ctc=79.489, loss_att=60.622, acc=0.688, loss=66.282, backward_time=0.753, grad_norm=110.078, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.689e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 10:05:04,751 (trainer:732) INFO: 23epoch:train:1501-1600batch: iter_time=9.681e-05, forward_time=0.109, loss_ctc=69.851, loss_att=56.849, acc=0.699, loss=60.749, backward_time=0.751, grad_norm=108.192, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.687e-05, train_time=1.993 +[gpua003:0/64] 2023-07-07 10:06:13,927 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua003:0/64] 2023-07-07 10:06:33,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 10:06:36,687 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 10:06:36,687 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua003:0/64] 2023-07-07 10:06:36,693 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 10:10:24,633 (trainer:732) INFO: 23epoch:train:1601-1700batch: iter_time=1.378, forward_time=0.109, loss_ctc=70.081, loss_att=51.925, acc=0.700, loss=57.372, backward_time=0.765, grad_norm=91.494, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.685e-05, train_time=6.397 +[gpua003:0/64] 2023-07-07 10:12:05,102 (trainer:732) INFO: 23epoch:train:1701-1800batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=70.081, loss_att=57.940, acc=0.707, loss=61.582, backward_time=0.756, grad_norm=100.076, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.683e-05, train_time=2.009 +[gpua003:0/64] 2023-07-07 10:13:45,172 (trainer:732) INFO: 23epoch:train:1801-1900batch: iter_time=1.097e-04, forward_time=0.108, loss_ctc=72.888, loss_att=55.204, acc=0.697, loss=60.509, backward_time=0.752, grad_norm=114.953, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.681e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 10:15:25,082 (trainer:732) INFO: 23epoch:train:1901-2000batch: iter_time=9.785e-05, forward_time=0.109, loss_ctc=83.550, loss_att=66.001, acc=0.695, loss=71.265, backward_time=0.753, grad_norm=99.143, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.680e-05, train_time=1.998 +[gpua003:0/64] 2023-07-07 10:17:04,949 (trainer:732) INFO: 23epoch:train:2001-2100batch: iter_time=9.886e-05, forward_time=0.110, loss_ctc=72.138, loss_att=59.700, acc=0.709, loss=63.431, backward_time=0.752, grad_norm=90.082, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.678e-05, train_time=1.997 +[gpua003:0/64] 2023-07-07 10:18:44,752 (trainer:732) INFO: 23epoch:train:2101-2200batch: iter_time=9.873e-05, forward_time=0.108, loss_ctc=71.072, loss_att=54.294, acc=0.707, loss=59.327, backward_time=0.752, grad_norm=109.276, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.676e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 10:20:24,385 (trainer:732) INFO: 23epoch:train:2201-2300batch: iter_time=9.689e-05, forward_time=0.107, loss_ctc=78.773, loss_att=58.998, acc=0.697, loss=64.930, backward_time=0.751, grad_norm=108.006, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.674e-05, train_time=1.992 +[gpua003:0/64] 2023-07-07 10:22:04,941 (trainer:732) INFO: 23epoch:train:2301-2400batch: iter_time=9.118e-05, forward_time=0.108, loss_ctc=74.926, loss_att=57.996, acc=0.700, loss=63.075, backward_time=0.752, grad_norm=123.642, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.672e-05, train_time=2.011 +[gpua003:0/64] 2023-07-07 10:23:49,235 (trainer:732) INFO: 23epoch:train:2401-2500batch: iter_time=9.225e-05, forward_time=0.107, loss_ctc=64.708, loss_att=53.380, acc=0.695, loss=56.779, backward_time=0.763, grad_norm=88.049, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.670e-05, train_time=2.086 +[gpua003:0/64] 2023-07-07 10:23:52,704 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua003:0/64] 2023-07-07 10:24:12,052 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 10:24:15,673 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 10:24:15,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua003:0/64] 2023-07-07 10:24:15,680 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 10:29:09,460 (trainer:732) INFO: 23epoch:train:2501-2600batch: iter_time=1.283, forward_time=0.109, loss_ctc=73.117, loss_att=58.099, acc=0.706, loss=62.605, backward_time=0.766, grad_norm=90.498, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.669e-05, train_time=6.404 +[gpua003:0/64] 2023-07-07 10:30:49,559 (trainer:732) INFO: 23epoch:train:2601-2700batch: iter_time=8.534e-05, forward_time=0.109, loss_ctc=64.315, loss_att=53.246, acc=0.691, loss=56.567, backward_time=0.752, grad_norm=89.794, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.667e-05, train_time=2.002 +[gpua003:0/64] 2023-07-07 10:32:29,624 (trainer:732) INFO: 23epoch:train:2701-2800batch: iter_time=8.962e-05, forward_time=0.109, loss_ctc=85.891, loss_att=63.051, acc=0.705, loss=69.903, backward_time=0.754, grad_norm=94.837, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.665e-05, train_time=2.001 +[gpua003:0/64] 2023-07-07 10:34:09,982 (trainer:732) INFO: 23epoch:train:2801-2900batch: iter_time=9.237e-05, forward_time=0.109, loss_ctc=74.116, loss_att=59.956, acc=0.701, loss=64.204, backward_time=0.753, grad_norm=89.065, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.663e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 10:35:50,367 (trainer:732) INFO: 23epoch:train:2901-3000batch: iter_time=8.691e-05, forward_time=0.109, loss_ctc=79.211, loss_att=61.574, acc=0.710, loss=66.865, backward_time=0.753, grad_norm=102.506, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.661e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 10:37:31,411 (trainer:732) INFO: 23epoch:train:3001-3100batch: iter_time=9.464e-05, forward_time=0.109, loss_ctc=65.890, loss_att=51.091, acc=0.706, loss=55.530, backward_time=0.755, grad_norm=86.526, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.660e-05, train_time=2.021 +[gpua003:0/64] 2023-07-07 10:39:12,463 (trainer:732) INFO: 23epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=78.191, loss_att=58.913, acc=0.696, loss=64.697, backward_time=0.758, grad_norm=107.870, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.658e-05, train_time=2.021 +[gpua003:0/64] 2023-07-07 10:40:55,933 (trainer:732) INFO: 23epoch:train:3201-3300batch: iter_time=1.031e-04, forward_time=0.108, loss_ctc=70.556, loss_att=54.398, acc=0.709, loss=59.245, backward_time=0.758, grad_norm=88.177, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.656e-05, train_time=2.069 +[gpua003:0/64] 2023-07-07 10:41:31,292 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua003:0/64] 2023-07-07 10:41:50,605 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 10:41:54,287 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 10:41:54,287 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua003:0/64] 2023-07-07 10:41:54,293 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 10:45:45,530 (trainer:732) INFO: 23epoch:train:3301-3400batch: iter_time=1.299, forward_time=0.108, loss_ctc=67.705, loss_att=53.054, acc=0.697, loss=57.449, backward_time=0.769, grad_norm=90.741, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.654e-05, train_time=5.792 +[gpua003:0/64] 2023-07-07 10:47:25,708 (trainer:732) INFO: 23epoch:train:3401-3500batch: iter_time=9.948e-05, forward_time=0.108, loss_ctc=69.513, loss_att=54.398, acc=0.710, loss=58.932, backward_time=0.753, grad_norm=90.557, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.653e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 10:49:05,710 (trainer:732) INFO: 23epoch:train:3501-3600batch: iter_time=9.595e-05, forward_time=0.109, loss_ctc=71.035, loss_att=55.784, acc=0.705, loss=60.359, backward_time=0.754, grad_norm=89.579, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.651e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 10:51:01,341 (trainer:732) INFO: 23epoch:train:3601-3700batch: iter_time=9.044e-05, forward_time=0.119, loss_ctc=84.230, loss_att=63.766, acc=0.703, loss=69.905, backward_time=0.776, grad_norm=98.539, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.649e-05, train_time=2.312 +[gpua003:0/64] 2023-07-07 10:52:56,875 (trainer:732) INFO: 23epoch:train:3701-3800batch: iter_time=6.396e-04, forward_time=0.131, loss_ctc=73.263, loss_att=59.816, acc=0.713, loss=63.850, backward_time=0.792, grad_norm=100.049, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.115, optim0_lr0=7.647e-05, train_time=2.310 +[gpua003:0/64] 2023-07-07 10:54:36,698 (trainer:732) INFO: 23epoch:train:3801-3900batch: iter_time=8.669e-05, forward_time=0.110, loss_ctc=66.313, loss_att=49.529, acc=0.713, loss=54.564, backward_time=0.753, grad_norm=92.675, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.645e-05, train_time=1.996 +[gpua003:0/64] 2023-07-07 10:56:17,417 (trainer:732) INFO: 23epoch:train:3901-4000batch: iter_time=9.704e-05, forward_time=0.112, loss_ctc=77.178, loss_att=56.536, acc=0.708, loss=62.729, backward_time=0.754, grad_norm=103.834, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.644e-05, train_time=2.014 +[gpua003:0/64] 2023-07-07 10:57:57,769 (trainer:732) INFO: 23epoch:train:4001-4100batch: iter_time=9.868e-05, forward_time=0.110, loss_ctc=72.979, loss_att=57.846, acc=0.699, loss=62.386, backward_time=0.753, grad_norm=115.524, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.642e-05, train_time=2.007 +[gpua003:0/64] 2023-07-07 10:59:19,759 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua003:0/64] 2023-07-07 10:59:39,162 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 10:59:42,862 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 10:59:42,862 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua003:0/64] 2023-07-07 10:59:42,869 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 11:04:06,757 (trainer:732) INFO: 23epoch:train:4101-4200batch: iter_time=2.609, forward_time=0.130, loss_ctc=65.738, loss_att=48.149, acc=0.704, loss=53.426, backward_time=0.768, grad_norm=87.940, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.115, optim0_lr0=7.640e-05, train_time=7.379 +[gpua003:0/64] 2023-07-07 11:05:49,241 (trainer:732) INFO: 23epoch:train:4201-4300batch: iter_time=1.057e-04, forward_time=0.109, loss_ctc=68.915, loss_att=58.433, acc=0.703, loss=61.578, backward_time=0.757, grad_norm=94.575, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.638e-05, train_time=2.050 +[gpua003:0/64] 2023-07-07 11:07:30,444 (trainer:732) INFO: 23epoch:train:4301-4400batch: iter_time=1.040e-04, forward_time=0.111, loss_ctc=71.161, loss_att=54.889, acc=0.690, loss=59.771, backward_time=0.754, grad_norm=100.822, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.636e-05, train_time=2.024 +[gpua003:0/64] 2023-07-07 11:09:10,593 (trainer:732) INFO: 23epoch:train:4401-4500batch: iter_time=1.108e-04, forward_time=0.110, loss_ctc=82.303, loss_att=66.740, acc=0.686, loss=71.409, backward_time=0.755, grad_norm=94.738, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.635e-05, train_time=2.003 +[gpua003:0/64] 2023-07-07 11:10:50,539 (trainer:732) INFO: 23epoch:train:4501-4600batch: iter_time=1.004e-04, forward_time=0.110, loss_ctc=71.919, loss_att=59.048, acc=0.707, loss=62.910, backward_time=0.753, grad_norm=85.604, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.633e-05, train_time=1.999 +[gpua003:0/64] 2023-07-07 11:12:30,296 (trainer:732) INFO: 23epoch:train:4601-4700batch: iter_time=1.109e-04, forward_time=0.110, loss_ctc=70.547, loss_att=55.273, acc=0.703, loss=59.856, backward_time=0.752, grad_norm=92.197, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.631e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 11:14:10,064 (trainer:732) INFO: 23epoch:train:4701-4800batch: iter_time=9.679e-05, forward_time=0.109, loss_ctc=78.198, loss_att=57.584, acc=0.701, loss=63.768, backward_time=0.754, grad_norm=100.076, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.629e-05, train_time=1.995 +[gpua003:0/64] 2023-07-07 11:15:50,073 (trainer:732) INFO: 23epoch:train:4801-4900batch: iter_time=1.089e-04, forward_time=0.110, loss_ctc=73.676, loss_att=58.368, acc=0.689, loss=62.960, backward_time=0.754, grad_norm=105.898, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.628e-05, train_time=2.000 +[gpua003:0/64] 2023-07-07 11:17:33,946 (trainer:732) INFO: 23epoch:train:4901-5000batch: iter_time=9.428e-05, forward_time=0.109, loss_ctc=64.333, loss_att=54.483, acc=0.690, loss=57.438, backward_time=0.757, grad_norm=94.071, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.626e-05, train_time=2.077 +[gpua003:0/64] 2023-07-07 11:17:39,457 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua003:0/64] 2023-07-07 11:17:59,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua003:0/64] 2023-07-07 11:18:02,629 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua003:0/64] 2023-07-07 11:18:02,629 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua003:0/64] 2023-07-07 11:18:02,635 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpua003:0/64] 2023-07-07 11:23:09,216 (trainer:732) INFO: 23epoch:train:5001-5100batch: iter_time=1.440, forward_time=0.127, loss_ctc=71.836, loss_att=56.120, acc=0.706, loss=60.835, backward_time=0.765, grad_norm=102.820, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.624e-05, train_time=6.705 +[gpua003:0/64] 2023-07-07 11:24:49,430 (trainer:732) INFO: 23epoch:train:5101-5200batch: iter_time=9.982e-05, forward_time=0.108, loss_ctc=63.033, loss_att=52.578, acc=0.690, loss=55.715, backward_time=0.754, grad_norm=77.578, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.622e-05, train_time=2.004 +gpua087:2330954:2332476 [1] NCCL INFO comm 0xbc380f30 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua055:3866105:3867680 [2] NCCL INFO comm 0xa0bacc0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua087:2330955:2332481 [2] NCCL INFO comm 0x1091ecd0 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua031:1680702:1682220 [2] NCCL INFO comm 0x90042a50 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua055:3866104:3867675 [1] NCCL INFO comm 0x4ff24650 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua028:3269322:3270845 [1] NCCL INFO comm 0x50ff9ba0 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua025:63838:65355 [2] NCCL INFO comm 0xc1f876b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua060:2854969:2856486 [1] NCCL INFO comm 0x8c2cb6d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua025:63837:65357 [1] NCCL INFO comm 0xa196ac90 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua060:2854970:2856496 [2] NCCL INFO comm 0xb4b68d30 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua003:350635:352158 [2] NCCL INFO comm 0xc165ff50 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua003:350634:352156 [1] NCCL INFO comm 0xb8217e10 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua028:3269323:3270853 [2] NCCL INFO comm 0x4fe1d010 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua053:959076:960598 [2] NCCL INFO comm 0xa5547430 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua090:2294099:2295633 [2] NCCL INFO comm 0x508070c0 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua029:1226922:1228446 [1] NCCL INFO comm 0x91446d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua029:1226923:1228448 [2] NCCL INFO comm 0x9682050 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua057:1814426:1815949 [1] NCCL INFO comm 0xb6887810 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua057:1814427:1815959 [2] NCCL INFO comm 0x8ff8bf0 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua035:1685218:1686747 [2] NCCL INFO comm 0x5149e590 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua035:1685217:1686742 [1] NCCL INFO comm 0x94073350 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua098:2101209:2102740 [1] NCCL INFO comm 0xb77452f0 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua005:322786:324303 [1] NCCL INFO comm 0x9e527b50 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua005:322787:324304 [2] NCCL INFO comm 0xa671d450 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua098:2101210:2102744 [2] NCCL INFO comm 0xb13e4b0 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua010:1622002:1623518 [2] NCCL INFO comm 0x95597d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpua074:989793:991318 [2] NCCL INFO comm 0x50124340 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +Process SpawnProcess-2: +gpua090:2294098:2295630 [1] NCCL INFO comm 0xb9291470 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua031:1680701:1682217 [1] NCCL INFO comm 0xb74170b0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 45] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804271 milliseconds before timing out. +gpua053:959075:960591 [1] NCCL INFO comm 0x50f9bf70 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-2: +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 21] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804411 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 17] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804302 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 14] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804276 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 1] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804307 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 53] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804259 milliseconds before timing out. +Process SpawnProcess-3: +Process SpawnProcess-3: +Traceback (most recent call last): +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( +RuntimeError: [Rank 2] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804368 milliseconds before timing out. + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 46] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804297 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 37] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804277 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 29] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804484 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 13] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804273 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 50] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804680 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 6] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804607 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 30] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804538 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 26] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804292 milliseconds before timing out. +gpua010:1622001:1623523 [1] NCCL INFO comm 0x8e6a9490 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpua074:989792:991309 [1] NCCL INFO comm 0x91b8e50 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 5] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804559 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 41] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804434 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 42] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804489 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): +Process SpawnProcess-3: + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 18] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804353 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) +RuntimeError: [Rank 34] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804440 milliseconds before timing out. + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 58] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804403 milliseconds before timing out. +Process SpawnProcess-2: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 61] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804595 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 62] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804654 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 10] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804678 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 22] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804467 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 38] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804274 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 54] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804266 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 25] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805539 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 33] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805578 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 57] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805585 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 9] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805928 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 49] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805925 milliseconds before timing out. +gpua005:322788:324302 [3] NCCL INFO comm 0xb7586590 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua053:959077:960604 [3] NCCL INFO comm 0x8f7ecf20 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 7] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1816754 milliseconds before timing out. +gpua087:2330956:2332486 [3] NCCL INFO comm 0x4fa40250 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua025:63839:65363 [3] NCCL INFO comm 0xc1e534d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua029:1226924:1228445 [3] NCCL INFO comm 0x502a1280 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 55] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817525 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 35] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817402 milliseconds before timing out. +gpua098:2101211:2102741 [3] NCCL INFO comm 0xb9e844a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpua028:3269324:3270856 [3] NCCL INFO comm 0x50758ff0 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch + scaler.scale(loss).backward() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward + torch.autograd.backward( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward + Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +RuntimeError: [Rank 15] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817790 milliseconds before timing out. +gpua074:989794:991315 [3] NCCL INFO comm 0x51823d90 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +srun: error: gpua005: task 1: Exited with exit code 1 +srun: error: gpua029: task 5: Exited with exit code 1 +srun: error: gpua025: task 3: Exited with exit code 1 +srun: error: gpua060: task 11: Exited with exit code 1 +srun: error: gpua035: task 7: Exited with exit code 1 +srun: error: gpua003: task 0: Exited with exit code 1 +srun: error: gpua055: task 9: Exited with exit code 1 +srun: error: gpua010: task 2: Exited with exit code 1 +srun: error: gpua087: task 13: Exited with exit code 1 +srun: error: gpua057: task 10: Exited with exit code 1 +srun: error: gpua031: task 6: Exited with exit code 1 +srun: error: gpua090: task 14: Exited with exit code 1 +srun: error: gpua053: task 8: Exited with exit code 1 +srun: error: gpua028: task 4: Exited with exit code 1 +srun: error: gpua098: task 15: Exited with exit code 1 +srun: error: gpua074: task 12: Exited with exit code 1 +# Accounting: begin_time=1688614643 +# Accounting: end_time=1688748923 +# Accounting: time=134280 threads=1 +# Finished at Fri Jul 7 11:55:23 CDT 2023 with status 1 diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.8.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.8.log new file mode 100644 index 0000000000000000000000000000000000000000..ffd1fa591021c1d02652569656e11521a9eca358 --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.8.log @@ -0,0 +1,4904 @@ +# Running on gpub015.delta.ncsa.illinois.edu +# Started at Tue Jul 4 13:05:28 CDT 2023 +# SLURMD_NODENAME=gpub015 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2127681 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2127681 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[015,026,031-032,036-037,049-053,078-082]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[015,026,031-032,036-037,049-053,078-082]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=879691 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub015 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea +[gpub015:0/64] 2023-07-04 13:08:42,453 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub015:0/64] 2023-07-04 13:08:43,721 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub015:0/64] 2023-07-04 13:08:43,750 (s2t:483) INFO: Vocabulary size: 50002 +[gpub015:0/64] 2023-07-04 13:08:58,245 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub015:0/64] 2023-07-04 13:08:58,254 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub015:0/64] 2023-07-04 13:08:58,254 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub015:0/64] 2023-07-04 13:08:58,254 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub015:0/64] 2023-07-04 13:08:58,256 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub015:0/64] 2023-07-04 13:08:58,944 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub015:0/64] 2023-07-04 13:09:08,464 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 13:09:08,613 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 13:09:08,613 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub015:0/64] 2023-07-04 13:09:08,619 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub015:0/64] 2023-07-04 13:09:09,108 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 13:09:09,424 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 13:09:09,424 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub015:0/64] 2023-07-04 13:09:09,424 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub015:0/64] 2023-07-04 13:09:37,908 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub015:879780:879780 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:879780:879780 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:879780:879780 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub015:0/64] 2023-07-04 13:09:43,209 (trainer:284) INFO: 11/100epoch started +[gpub015:0/64] 2023-07-04 13:09:43,268 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub015:0/64] 2023-07-04 13:10:00,590 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 13:10:03,964 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 13:10:03,964 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub015:0/64] 2023-07-04 13:10:03,970 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +gpub032:3289606:3289606 [3] NCCL INFO cudaDriverVersion 12010 +gpub032:3289606:3289606 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:3289606:3289606 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:3289606:3289687 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:3289606:3289687 [3] NCCL INFO Using network IB +gpub032:3289606:3289687 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub032:3289606:3289687 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub032:3289606:3289687 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub032:3289606:3289687 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub032:3289606:3289687 [3] NCCL INFO Connected all rings +gpub032:3289606:3289687 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub032:3289606:3289687 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub032:3289606:3289687 [3] NCCL INFO Connected all trees +gpub032:3289606:3289687 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub032:3289606:3289687 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:3289606:3289687 [3] NCCL INFO comm 0x501cec20 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub015:879783:879783 [3] NCCL INFO cudaDriverVersion 12010 +gpub015:879783:879783 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:879783:879783 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:879783:879851 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:879783:879851 [3] NCCL INFO Using network IB +gpub015:879783:879851 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub015:879783:879851 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub015:879783:879851 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub015:879783:879851 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub015:879783:879851 [3] NCCL INFO Connected all rings +gpub015:879783:879851 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub015:879783:879851 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub015:879783:879851 [3] NCCL INFO Connected all trees +gpub051:2913626:2913626 [3] NCCL INFO cudaDriverVersion 12010 +gpub051:2913626:2913626 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:2913626:2913626 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:2913626:2913705 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:2913626:2913705 [3] NCCL INFO Using network IB +gpub051:2913626:2913705 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub051:2913626:2913705 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub051:2913626:2913705 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub051:2913626:2913705 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub051:2913626:2913705 [3] NCCL INFO Connected all rings +gpub051:2913626:2913705 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub051:2913626:2913705 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub015:879783:879851 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:879783:879851 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:879783:879851 [3] NCCL INFO comm 0x5071eb50 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub051:2913626:2913705 [3] NCCL INFO Connected all trees +gpub051:2913626:2913705 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:2913626:2913705 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:2913626:2913705 [3] NCCL INFO comm 0x9e42a10 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub051:2913625:2913625 [2] NCCL INFO cudaDriverVersion 12010 +gpub051:2913625:2913625 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:2913625:2913625 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:2913625:2913708 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:2913625:2913708 [2] NCCL INFO Using network IB +gpub051:2913625:2913708 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub051:2913625:2913708 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub051:2913625:2913708 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub051:2913625:2913708 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub051:2913625:2913708 [2] NCCL INFO Connected all rings +gpub051:2913625:2913708 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub051:2913625:2913708 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub051:2913625:2913708 [2] NCCL INFO Connected all trees +gpub051:2913625:2913708 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:2913625:2913708 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:2913625:2913708 [2] NCCL INFO comm 0xb9b5ccd0 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub080:4113204:4113204 [1] NCCL INFO cudaDriverVersion 12010 +gpub080:4113204:4113204 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0> +gpub080:4113204:4113204 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub080:4113204:4113287 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0> +gpub080:4113204:4113287 [1] NCCL INFO Using network IB +gpub080:4113204:4113287 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub080:4113204:4113287 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub080:4113204:4113287 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub080:4113204:4113287 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub080:4113204:4113287 [1] NCCL INFO Connected all rings +gpub080:4113204:4113287 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub080:4113204:4113287 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub080:4113204:4113287 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub080:4113204:4113287 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub080:4113204:4113287 [1] NCCL INFO Connected all trees +gpub080:4113204:4113287 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub080:4113204:4113287 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub080:4113204:4113287 [1] NCCL INFO comm 0xb71b4bf0 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:2657933:2657933 [1] NCCL INFO cudaDriverVersion 12010 +gpub079:2657933:2657933 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:2657933:2657933 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:2657933:2658006 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:2657933:2658006 [1] NCCL INFO Using network IB +gpub079:2657933:2658006 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub079:2657933:2658006 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub079:2657933:2658006 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub079:2657933:2658006 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub079:2657933:2658006 [1] NCCL INFO Connected all rings +gpub079:2657933:2658006 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub079:2657933:2658006 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub079:2657933:2658006 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub079:2657933:2658006 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub079:2657933:2658006 [1] NCCL INFO Connected all trees +gpub079:2657933:2658006 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:2657933:2658006 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:2657933:2658006 [1] NCCL INFO comm 0x8f776d0 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub052:1901667:1901667 [0] NCCL INFO cudaDriverVersion 12010 +gpub052:1901667:1901667 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:1901667:1901667 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:1901667:1901752 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:1901667:1901752 [0] NCCL INFO Using network IB +gpub052:1901667:1901752 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub052:1901667:1901752 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub052:1901667:1901752 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub052:1901667:1901752 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub052:1901667:1901752 [0] NCCL INFO Connected all rings +gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub052:1901667:1901752 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub052:1901667:1901752 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub052:1901667:1901752 [0] NCCL INFO Connected all trees +gpub052:1901667:1901752 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:1901667:1901752 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:1901667:1901752 [0] NCCL INFO comm 0xbc2124a0 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub031:1921204:1921204 [0] NCCL INFO cudaDriverVersion 12010 +gpub031:1921204:1921204 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1921204:1921204 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1921204:1921285 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1921204:1921285 [0] NCCL INFO Using network IB +gpub031:1921204:1921285 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub031:1921204:1921285 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub031:1921204:1921285 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub031:1921204:1921285 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub031:1921204:1921285 [0] NCCL INFO Connected all rings +gpub031:1921204:1921285 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub031:1921204:1921285 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub031:1921204:1921285 [0] NCCL INFO Connected all trees +gpub031:1921204:1921285 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:1921204:1921285 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1921204:1921285 [0] NCCL INFO comm 0xb63f1750 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub031:1921207:1921207 [3] NCCL INFO cudaDriverVersion 12010 +gpub031:1921207:1921207 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1921207:1921207 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1921207:1921286 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1921207:1921286 [3] NCCL INFO Using network IB +gpub031:1921207:1921286 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub031:1921207:1921286 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub031:1921207:1921286 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub031:1921207:1921286 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub031:1921207:1921286 [3] NCCL INFO Connected all rings +gpub031:1921207:1921286 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub031:1921207:1921286 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub031:1921207:1921286 [3] NCCL INFO Connected all trees +gpub031:1921207:1921286 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:1921207:1921286 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1921207:1921286 [3] NCCL INFO comm 0x9451c60 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub052:1901668:1901668 [1] NCCL INFO cudaDriverVersion 12010 +gpub052:1901668:1901668 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:1901668:1901668 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:1901668:1901749 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:1901668:1901749 [1] NCCL INFO Using network IB +gpub052:1901668:1901749 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub052:1901668:1901749 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub052:1901668:1901749 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub052:1901668:1901749 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub052:1901668:1901749 [1] NCCL INFO Connected all rings +gpub052:1901668:1901749 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub052:1901668:1901749 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub052:1901668:1901749 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub052:1901668:1901749 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub052:1901668:1901749 [1] NCCL INFO Connected all trees +gpub052:1901668:1901749 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:1901668:1901749 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:1901668:1901749 [1] NCCL INFO comm 0x50134230 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub080:4113203:4113203 [0] NCCL INFO cudaDriverVersion 12010 +gpub080:4113203:4113203 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0> +gpub080:4113203:4113203 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub080:4113203:4113290 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0> +gpub080:4113203:4113290 [0] NCCL INFO Using network IB +gpub080:4113203:4113290 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub080:4113203:4113290 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub080:4113203:4113290 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub080:4113203:4113290 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub080:4113203:4113290 [0] NCCL INFO Connected all rings +gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub080:4113203:4113290 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub080:4113203:4113290 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub080:4113203:4113290 [0] NCCL INFO Connected all trees +gpub080:4113203:4113290 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub080:4113203:4113290 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub080:4113203:4113290 [0] NCCL INFO comm 0xa21d7f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub079:2657935:2657935 [3] NCCL INFO cudaDriverVersion 12010 +gpub079:2657935:2657935 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:2657935:2657935 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:2657935:2658008 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:2657935:2658008 [3] NCCL INFO Using network IB +gpub079:2657935:2658008 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub079:2657935:2658008 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub079:2657935:2658008 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub079:2657935:2658008 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub079:2657935:2658008 [3] NCCL INFO Connected all rings +gpub079:2657935:2658008 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub079:2657935:2658008 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub079:2657935:2658008 [3] NCCL INFO Connected all trees +gpub079:2657935:2658008 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:2657935:2658008 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:2657935:2658008 [3] NCCL INFO comm 0x4edd83d0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub032:3289605:3289605 [2] NCCL INFO cudaDriverVersion 12010 +gpub032:3289605:3289605 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:3289605:3289605 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:3289605:3289686 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:3289605:3289686 [2] NCCL INFO Using network IB +gpub032:3289605:3289686 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub032:3289605:3289686 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub032:3289605:3289686 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub032:3289605:3289686 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub032:3289605:3289686 [2] NCCL INFO Connected all rings +gpub032:3289605:3289686 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub032:3289605:3289686 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub032:3289605:3289686 [2] NCCL INFO Connected all trees +gpub032:3289605:3289686 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub032:3289605:3289686 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:3289605:3289686 [2] NCCL INFO comm 0xb6f8bc90 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub032:3289604:3289604 [1] NCCL INFO cudaDriverVersion 12010 +gpub032:3289604:3289604 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:3289604:3289604 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:3289604:3289685 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:3289604:3289685 [1] NCCL INFO Using network IB +gpub032:3289604:3289685 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub032:3289604:3289685 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub032:3289604:3289685 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub032:3289604:3289685 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub032:3289604:3289685 [1] NCCL INFO Connected all rings +gpub032:3289604:3289685 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub032:3289604:3289685 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub032:3289604:3289685 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub032:3289604:3289685 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub032:3289604:3289685 [1] NCCL INFO Connected all trees +gpub032:3289604:3289685 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub032:3289604:3289685 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:3289604:3289685 [1] NCCL INFO comm 0x50c34690 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:2657932:2657932 [0] NCCL INFO cudaDriverVersion 12010 +gpub079:2657932:2657932 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:2657932:2657932 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:2657932:2658009 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:2657932:2658009 [0] NCCL INFO Using network IB +gpub079:2657932:2658009 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub079:2657932:2658009 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub079:2657932:2658009 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub079:2657932:2658009 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub079:2657932:2658009 [0] NCCL INFO Connected all rings +gpub079:2657932:2658009 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub079:2657932:2658009 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub079:2657932:2658009 [0] NCCL INFO Connected all trees +gpub079:2657932:2658009 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:2657932:2658009 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:2657932:2658009 [0] NCCL INFO comm 0x8c890be0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub031:1921205:1921205 [1] NCCL INFO cudaDriverVersion 12010 +gpub031:1921205:1921205 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1921205:1921205 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1921205:1921287 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1921205:1921287 [1] NCCL INFO Using network IB +gpub031:1921205:1921287 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub031:1921205:1921287 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub031:1921205:1921287 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub031:1921205:1921287 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub031:1921205:1921287 [1] NCCL INFO Connected all rings +gpub031:1921205:1921287 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub031:1921205:1921287 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub031:1921205:1921287 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub031:1921205:1921287 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub031:1921205:1921287 [1] NCCL INFO Connected all trees +gpub031:1921205:1921287 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:1921205:1921287 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1921205:1921287 [1] NCCL INFO comm 0x92a3a80 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub051:2913623:2913623 [0] NCCL INFO cudaDriverVersion 12010 +gpub051:2913623:2913623 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:2913623:2913623 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:2913623:2913706 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:2913623:2913706 [0] NCCL INFO Using network IB +gpub051:2913623:2913706 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub051:2913623:2913706 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub051:2913623:2913706 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub051:2913623:2913706 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub051:2913623:2913706 [0] NCCL INFO Connected all rings +gpub051:2913623:2913706 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub051:2913623:2913706 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub051:2913623:2913706 [0] NCCL INFO Connected all trees +gpub051:2913623:2913706 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:2913623:2913706 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:2913623:2913706 [0] NCCL INFO comm 0x8dc3e980 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub050:1879226:1879226 [1] NCCL INFO cudaDriverVersion 12010 +gpub050:1879226:1879226 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:1879226:1879226 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:1879226:1879305 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:1879226:1879305 [1] NCCL INFO Using network IB +gpub050:1879226:1879305 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub050:1879226:1879305 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub050:1879226:1879305 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub050:1879226:1879305 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub050:1879226:1879305 [1] NCCL INFO Connected all rings +gpub050:1879226:1879305 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub050:1879226:1879305 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub080:4113206:4113206 [3] NCCL INFO cudaDriverVersion 12010 +gpub080:4113206:4113206 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0> +gpub080:4113206:4113206 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub080:4113206:4113289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0> +gpub080:4113206:4113289 [3] NCCL INFO Using network IB +gpub080:4113206:4113289 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub080:4113206:4113289 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub080:4113206:4113289 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub080:4113206:4113289 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub080:4113206:4113289 [3] NCCL INFO Connected all rings +gpub080:4113206:4113289 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub080:4113206:4113289 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub050:1879226:1879305 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub050:1879226:1879305 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub050:1879226:1879305 [1] NCCL INFO Connected all trees +gpub050:1879226:1879305 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:1879226:1879305 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:1879226:1879305 [1] NCCL INFO comm 0x50792660 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub080:4113206:4113289 [3] NCCL INFO Connected all trees +gpub080:4113206:4113289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub080:4113206:4113289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub080:4113206:4113289 [3] NCCL INFO comm 0x8c72c2a0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub050:1879227:1879227 [2] NCCL INFO cudaDriverVersion 12010 +gpub050:1879227:1879227 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:1879227:1879227 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:1879227:1879304 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:1879227:1879304 [2] NCCL INFO Using network IB +gpub050:1879227:1879304 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub050:1879227:1879304 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub050:1879227:1879304 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub050:1879227:1879304 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub050:1879227:1879304 [2] NCCL INFO Connected all rings +gpub050:1879227:1879304 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub050:1879227:1879304 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub049:4064877:4064877 [3] NCCL INFO cudaDriverVersion 12010 +gpub049:4064877:4064877 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:4064877:4064877 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:4064877:4064941 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:4064877:4064941 [3] NCCL INFO Using network IB +gpub049:4064877:4064941 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub049:4064877:4064941 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub049:4064877:4064941 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub049:4064877:4064941 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub049:4064877:4064941 [3] NCCL INFO Connected all rings +gpub049:4064877:4064941 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub049:4064877:4064941 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub050:1879227:1879304 [2] NCCL INFO Connected all trees +gpub050:1879227:1879304 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:1879227:1879304 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:1879227:1879304 [2] NCCL INFO comm 0x50baa200 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub049:4064877:4064941 [3] NCCL INFO Connected all trees +gpub049:4064877:4064941 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:4064877:4064941 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:4064877:4064941 [3] NCCL INFO comm 0x4f5c00a0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub079:2657934:2657934 [2] NCCL INFO cudaDriverVersion 12010 +gpub079:2657934:2657934 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:2657934:2657934 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:2657934:2658007 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:2657934:2658007 [2] NCCL INFO Using network IB +gpub079:2657934:2658007 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub079:2657934:2658007 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub079:2657934:2658007 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub079:2657934:2658007 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub079:2657934:2658007 [2] NCCL INFO Connected all rings +gpub079:2657934:2658007 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub079:2657934:2658007 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub079:2657934:2658007 [2] NCCL INFO Connected all trees +gpub079:2657934:2658007 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:2657934:2658007 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:2657934:2658007 [2] NCCL INFO comm 0x505ec9b0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub015:879782:879782 [2] NCCL INFO cudaDriverVersion 12010 +gpub015:879782:879782 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:879782:879782 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:879782:879850 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:879782:879850 [2] NCCL INFO Using network IB +gpub015:879782:879850 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub015:879782:879850 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub015:879782:879850 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub015:879782:879850 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub015:879782:879850 [2] NCCL INFO Connected all rings +gpub015:879782:879850 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub015:879782:879850 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub015:879782:879850 [2] NCCL INFO Connected all trees +gpub015:879782:879850 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:879782:879850 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:879782:879850 [2] NCCL INFO comm 0x502ad7c0 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub036:1870498:1870498 [2] NCCL INFO cudaDriverVersion 12010 +gpub036:1870498:1870498 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.136<0> +gpub036:1870498:1870498 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub036:1870498:1870579 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.136<0> +gpub036:1870498:1870579 [2] NCCL INFO Using network IB +gpub036:1870498:1870579 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub036:1870498:1870579 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub036:1870498:1870579 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub036:1870498:1870579 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub036:1870498:1870579 [2] NCCL INFO Connected all rings +gpub036:1870498:1870579 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub036:1870498:1870579 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub036:1870498:1870579 [2] NCCL INFO Connected all trees +gpub036:1870498:1870579 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub036:1870498:1870579 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub036:1870498:1870579 [2] NCCL INFO comm 0x50c66a10 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub031:1921206:1921206 [2] NCCL INFO cudaDriverVersion 12010 +gpub031:1921206:1921206 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1921206:1921206 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1921206:1921288 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1921206:1921288 [2] NCCL INFO Using network IB +gpub031:1921206:1921288 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub031:1921206:1921288 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub031:1921206:1921288 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub031:1921206:1921288 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub031:1921206:1921288 [2] NCCL INFO Connected all rings +gpub031:1921206:1921288 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub031:1921206:1921288 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub031:1921206:1921288 [2] NCCL INFO Connected all trees +gpub031:1921206:1921288 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:1921206:1921288 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1921206:1921288 [2] NCCL INFO comm 0xc2e65190 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub082:1518447:1518447 [2] NCCL INFO cudaDriverVersion 12010 +gpub082:1518447:1518447 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0> +gpub082:1518447:1518447 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub082:1518447:1518526 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.182<0> +gpub082:1518447:1518526 [2] NCCL INFO Using network IB +gpub082:1518447:1518526 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub082:1518447:1518526 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub082:1518447:1518526 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub082:1518447:1518526 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub082:1518447:1518526 [2] NCCL INFO Connected all rings +gpub082:1518447:1518526 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub082:1518447:1518526 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub082:1518447:1518526 [2] NCCL INFO Connected all trees +gpub082:1518447:1518526 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub082:1518447:1518526 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub082:1518447:1518526 [2] NCCL INFO comm 0xb6376a90 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub026:2433084:2433084 [0] NCCL INFO cudaDriverVersion 12010 +gpub026:2433084:2433084 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2433084:2433084 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2433084:2433166 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2433084:2433166 [0] NCCL INFO Using network IB +gpub026:2433084:2433166 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub026:2433084:2433166 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub026:2433084:2433166 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub026:2433084:2433166 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub026:2433084:2433166 [0] NCCL INFO Connected all rings +gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub026:2433084:2433166 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub026:2433084:2433166 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub026:2433084:2433166 [0] NCCL INFO Connected all trees +gpub026:2433084:2433166 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2433084:2433166 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2433084:2433166 [0] NCCL INFO comm 0x4fe36690 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub049:4064874:4064874 [0] NCCL INFO cudaDriverVersion 12010 +gpub049:4064874:4064874 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:4064874:4064874 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:4064874:4064942 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:4064874:4064942 [0] NCCL INFO Using network IB +gpub049:4064874:4064942 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub049:4064874:4064942 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub049:4064874:4064942 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub049:4064874:4064942 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub049:4064874:4064942 [0] NCCL INFO Connected all rings +gpub049:4064874:4064942 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub049:4064874:4064942 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub049:4064874:4064942 [0] NCCL INFO Connected all trees +gpub049:4064874:4064942 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:4064874:4064942 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:4064874:4064942 [0] NCCL INFO comm 0x500f4c60 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub082:1518446:1518446 [1] NCCL INFO cudaDriverVersion 12010 +gpub082:1518446:1518446 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0> +gpub082:1518446:1518446 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub082:1518446:1518525 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.182<0> +gpub082:1518446:1518525 [1] NCCL INFO Using network IB +gpub082:1518446:1518525 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub082:1518446:1518525 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub082:1518446:1518525 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub082:1518446:1518525 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub082:1518446:1518525 [1] NCCL INFO Connected all rings +gpub082:1518446:1518525 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub082:1518446:1518525 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub082:1518446:1518525 [1] NCCL INFO Connected all trees +gpub082:1518446:1518525 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub082:1518446:1518525 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub082:1518446:1518525 [1] NCCL INFO comm 0xb6caaae0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub036:1870496:1870496 [0] NCCL INFO cudaDriverVersion 12010 +gpub036:1870496:1870496 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.136<0> +gpub036:1870496:1870496 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub036:1870496:1870578 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.136<0> +gpub036:1870496:1870578 [0] NCCL INFO Using network IB +gpub036:1870496:1870578 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub036:1870496:1870578 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub036:1870496:1870578 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub036:1870496:1870578 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub036:1870496:1870578 [0] NCCL INFO Connected all rings +gpub036:1870496:1870578 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub036:1870496:1870578 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub036:1870496:1870578 [0] NCCL INFO Connected all trees +gpub036:1870496:1870578 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub036:1870496:1870578 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub036:1870496:1870578 [0] NCCL INFO comm 0xad17bd0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub081:2742227:2742227 [0] NCCL INFO cudaDriverVersion 12010 +gpub081:2742227:2742227 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.181<0> +gpub081:2742227:2742227 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub081:2742227:2742317 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.181<0> +gpub081:2742227:2742317 [0] NCCL INFO Using network IB +gpub081:2742227:2742317 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub081:2742227:2742317 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub081:2742227:2742317 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub081:2742227:2742317 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub081:2742227:2742317 [0] NCCL INFO Connected all rings +gpub026:2433087:2433087 [3] NCCL INFO cudaDriverVersion 12010 +gpub026:2433087:2433087 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2433087:2433087 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2433087:2433163 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2433087:2433163 [3] NCCL INFO Using network IB +gpub026:2433087:2433163 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub026:2433087:2433163 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub026:2433087:2433163 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub026:2433087:2433163 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub026:2433087:2433163 [3] NCCL INFO Connected all rings +gpub026:2433087:2433163 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub026:2433087:2433163 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub081:2742227:2742317 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub081:2742227:2742317 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub081:2742227:2742317 [0] NCCL INFO Connected all trees +gpub081:2742227:2742317 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub081:2742227:2742317 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub081:2742227:2742317 [0] NCCL INFO comm 0x518b4950 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub026:2433087:2433163 [3] NCCL INFO Connected all trees +gpub026:2433087:2433163 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2433087:2433163 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2433087:2433163 [3] NCCL INFO comm 0x50347080 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub051:2913624:2913624 [1] NCCL INFO cudaDriverVersion 12010 +gpub051:2913624:2913624 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0> +gpub051:2913624:2913624 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub051:2913624:2913707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0> +gpub051:2913624:2913707 [1] NCCL INFO Using network IB +gpub051:2913624:2913707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub051:2913624:2913707 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub051:2913624:2913707 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub051:2913624:2913707 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub051:2913624:2913707 [1] NCCL INFO Connected all rings +gpub051:2913624:2913707 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub051:2913624:2913707 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub051:2913624:2913707 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub051:2913624:2913707 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub051:2913624:2913707 [1] NCCL INFO Connected all trees +gpub051:2913624:2913707 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub051:2913624:2913707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub051:2913624:2913707 [1] NCCL INFO comm 0xbb329750 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub052:1901670:1901670 [3] NCCL INFO cudaDriverVersion 12010 +gpub052:1901670:1901670 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:1901670:1901670 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:1901670:1901750 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:1901670:1901750 [3] NCCL INFO Using network IB +gpub052:1901670:1901750 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub052:1901670:1901750 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub052:1901670:1901750 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub052:1901670:1901750 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub052:1901670:1901750 [3] NCCL INFO Connected all rings +gpub052:1901670:1901750 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub052:1901670:1901750 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub052:1901670:1901750 [3] NCCL INFO Connected all trees +gpub052:1901670:1901750 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:1901670:1901750 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:1901670:1901750 [3] NCCL INFO comm 0xb6ced700 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub036:1870497:1870497 [1] NCCL INFO cudaDriverVersion 12010 +gpub036:1870497:1870497 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.136<0> +gpub036:1870497:1870497 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub036:1870497:1870580 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.136<0> +gpub036:1870497:1870580 [1] NCCL INFO Using network IB +gpub036:1870497:1870580 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub036:1870497:1870580 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub036:1870497:1870580 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub036:1870497:1870580 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub036:1870497:1870580 [1] NCCL INFO Connected all rings +gpub036:1870497:1870580 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub036:1870497:1870580 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub036:1870497:1870580 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub036:1870497:1870580 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub036:1870497:1870580 [1] NCCL INFO Connected all trees +gpub036:1870497:1870580 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub036:1870497:1870580 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub036:1870497:1870580 [1] NCCL INFO comm 0x4fcaadc0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub078:4170391:4170391 [2] NCCL INFO cudaDriverVersion 12010 +gpub078:4170391:4170391 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:4170391:4170391 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:4170391:4170469 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:4170391:4170469 [2] NCCL INFO Using network IB +gpub078:4170391:4170469 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub078:4170391:4170469 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub078:4170391:4170469 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub078:4170391:4170469 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub078:4170391:4170469 [2] NCCL INFO Connected all rings +gpub078:4170391:4170469 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub078:4170391:4170469 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub078:4170391:4170469 [2] NCCL INFO Connected all trees +gpub078:4170391:4170469 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:4170391:4170469 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:4170391:4170469 [2] NCCL INFO comm 0x5187a990 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub082:1518448:1518448 [3] NCCL INFO cudaDriverVersion 12010 +gpub082:1518448:1518448 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0> +gpub082:1518448:1518448 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub082:1518448:1518524 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.182<0> +gpub082:1518448:1518524 [3] NCCL INFO Using network IB +gpub082:1518448:1518524 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub082:1518448:1518524 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub082:1518448:1518524 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub082:1518448:1518524 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub082:1518448:1518524 [3] NCCL INFO Connected all rings +gpub082:1518448:1518524 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub082:1518448:1518524 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub082:1518448:1518524 [3] NCCL INFO Connected all trees +gpub082:1518448:1518524 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub082:1518448:1518524 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub082:1518448:1518524 [3] NCCL INFO comm 0x8c5b6f90 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:1664487:1664487 [1] NCCL INFO cudaDriverVersion 12010 +gpub053:1664487:1664487 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:1664487:1664487 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:1664487:1664558 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:1664487:1664558 [1] NCCL INFO Using network IB +gpub053:1664487:1664558 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub053:1664487:1664558 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub053:1664487:1664558 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub053:1664487:1664558 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub053:1664487:1664558 [1] NCCL INFO Connected all rings +gpub053:1664487:1664558 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub053:1664487:1664558 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub053:1664487:1664558 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub053:1664487:1664558 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub053:1664487:1664558 [1] NCCL INFO Connected all trees +gpub053:1664487:1664558 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:1664487:1664558 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:1664487:1664558 [1] NCCL INFO comm 0x506110d0 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub026:2433086:2433086 [2] NCCL INFO cudaDriverVersion 12010 +gpub026:2433086:2433086 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2433086:2433086 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2433086:2433164 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2433086:2433164 [2] NCCL INFO Using network IB +gpub026:2433086:2433164 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub026:2433086:2433164 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub026:2433086:2433164 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub026:2433086:2433164 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub026:2433086:2433164 [2] NCCL INFO Connected all rings +gpub026:2433086:2433164 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub026:2433086:2433164 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub026:2433086:2433164 [2] NCCL INFO Connected all trees +gpub026:2433086:2433164 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2433086:2433164 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2433086:2433164 [2] NCCL INFO comm 0xc27df910 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub080:4113205:4113205 [2] NCCL INFO cudaDriverVersion 12010 +gpub080:4113205:4113205 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0> +gpub080:4113205:4113205 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub080:4113205:4113288 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0> +gpub080:4113205:4113288 [2] NCCL INFO Using network IB +gpub080:4113205:4113288 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub080:4113205:4113288 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub080:4113205:4113288 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub080:4113205:4113288 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub080:4113205:4113288 [2] NCCL INFO Connected all rings +gpub080:4113205:4113288 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub080:4113205:4113288 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub080:4113205:4113288 [2] NCCL INFO Connected all trees +gpub080:4113205:4113288 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub080:4113205:4113288 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub080:4113205:4113288 [2] NCCL INFO comm 0x50af0e00 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub050:1879228:1879228 [3] NCCL INFO cudaDriverVersion 12010 +gpub050:1879228:1879228 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:1879228:1879228 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:1879228:1879302 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:1879228:1879302 [3] NCCL INFO Using network IB +gpub050:1879228:1879302 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub050:1879228:1879302 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub050:1879228:1879302 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub050:1879228:1879302 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub050:1879228:1879302 [3] NCCL INFO Connected all rings +gpub050:1879228:1879302 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub050:1879228:1879302 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub050:1879228:1879302 [3] NCCL INFO Connected all trees +gpub050:1879228:1879302 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:1879228:1879302 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:1879228:1879302 [3] NCCL INFO comm 0x5177dae0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub081:2742230:2742230 [3] NCCL INFO cudaDriverVersion 12010 +gpub081:2742230:2742230 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.181<0> +gpub081:2742230:2742230 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub081:2742230:2742314 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.181<0> +gpub081:2742230:2742314 [3] NCCL INFO Using network IB +gpub081:2742230:2742314 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub081:2742230:2742314 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub081:2742230:2742314 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub081:2742230:2742314 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub081:2742230:2742314 [3] NCCL INFO Connected all rings +gpub081:2742230:2742314 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub081:2742230:2742314 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub081:2742230:2742314 [3] NCCL INFO Connected all trees +gpub081:2742230:2742314 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub081:2742230:2742314 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub081:2742230:2742314 [3] NCCL INFO comm 0xba992be0 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub053:1664488:1664488 [2] NCCL INFO cudaDriverVersion 12010 +gpub053:1664488:1664488 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:1664488:1664488 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:1664488:1664560 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:1664488:1664560 [2] NCCL INFO Using network IB +gpub053:1664488:1664560 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub053:1664488:1664560 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub053:1664488:1664560 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub053:1664488:1664560 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub053:1664488:1664560 [2] NCCL INFO Connected all rings +gpub053:1664488:1664560 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub053:1664488:1664560 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub053:1664488:1664560 [2] NCCL INFO Connected all trees +gpub053:1664488:1664560 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:1664488:1664560 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:1664488:1664560 [2] NCCL INFO comm 0xe3027a0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub078:4170390:4170390 [1] NCCL INFO cudaDriverVersion 12010 +gpub078:4170390:4170390 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:4170390:4170390 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:4170390:4170468 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:4170390:4170468 [1] NCCL INFO Using network IB +gpub078:4170390:4170468 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub078:4170390:4170468 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub078:4170390:4170468 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub078:4170390:4170468 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub078:4170390:4170468 [1] NCCL INFO Connected all rings +gpub078:4170390:4170468 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub078:4170390:4170468 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub078:4170390:4170468 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub078:4170390:4170468 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub078:4170390:4170468 [1] NCCL INFO Connected all trees +gpub078:4170390:4170468 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:4170390:4170468 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:4170390:4170468 [1] NCCL INFO comm 0x1d97f440 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub078:4170389:4170389 [0] NCCL INFO cudaDriverVersion 12010 +gpub078:4170389:4170389 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:4170389:4170389 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:4170389:4170470 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:4170389:4170470 [0] NCCL INFO Using network IB +gpub078:4170389:4170470 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub078:4170389:4170470 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub078:4170389:4170470 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub078:4170389:4170470 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub078:4170389:4170470 [0] NCCL INFO Connected all rings +gpub078:4170389:4170470 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub078:4170389:4170470 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub078:4170389:4170470 [0] NCCL INFO Connected all trees +gpub078:4170389:4170470 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:4170389:4170470 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:4170389:4170470 [0] NCCL INFO comm 0x4f656710 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub078:4170392:4170392 [3] NCCL INFO cudaDriverVersion 12010 +gpub078:4170392:4170392 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0> +gpub078:4170392:4170392 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub078:4170392:4170471 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0> +gpub078:4170392:4170471 [3] NCCL INFO Using network IB +gpub078:4170392:4170471 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub078:4170392:4170471 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub078:4170392:4170471 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub078:4170392:4170471 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub078:4170392:4170471 [3] NCCL INFO Connected all rings +gpub078:4170392:4170471 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub078:4170392:4170471 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub078:4170392:4170471 [3] NCCL INFO Connected all trees +gpub078:4170392:4170471 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub078:4170392:4170471 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub078:4170392:4170471 [3] NCCL INFO comm 0x4f67f390 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub015:879780:879852 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:879780:879852 [0] NCCL INFO Using network IB +gpub015:879780:879852 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub015:879780:879852 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub015:879780:879852 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub015:879780:879852 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub015:879780:879852 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub015:879780:879852 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub015:879780:879852 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub015:879780:879852 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub015:879780:879852 [0] NCCL INFO Connected all rings +gpub015:879780:879852 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub015:879780:879852 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub015:879780:879852 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub015:879780:879852 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub015:879780:879852 [0] NCCL INFO Connected all trees +gpub015:879780:879852 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:879780:879852 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:879780:879852 [0] NCCL INFO comm 0x51871d20 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub015:879781:879781 [1] NCCL INFO cudaDriverVersion 12010 +gpub015:879781:879781 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:879781:879781 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:879781:879853 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:879781:879853 [1] NCCL INFO Using network IB +gpub015:879781:879853 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub015:879781:879853 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub015:879781:879853 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub015:879781:879853 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub015:879781:879853 [1] NCCL INFO Connected all rings +gpub015:879781:879853 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub015:879781:879853 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub015:879781:879853 [1] NCCL INFO Connected all trees +gpub015:879781:879853 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:879781:879853 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:879781:879853 [1] NCCL INFO comm 0x8d09c1b0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub053:1664486:1664486 [0] NCCL INFO cudaDriverVersion 12010 +gpub053:1664486:1664486 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:1664486:1664486 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:1664486:1664557 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:1664486:1664557 [0] NCCL INFO Using network IB +gpub053:1664486:1664557 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub053:1664486:1664557 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub053:1664486:1664557 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub053:1664486:1664557 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub053:1664486:1664557 [0] NCCL INFO Connected all rings +gpub053:1664486:1664557 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub053:1664486:1664557 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub053:1664486:1664557 [0] NCCL INFO Connected all trees +gpub053:1664486:1664557 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:1664486:1664557 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:1664486:1664557 [0] NCCL INFO comm 0x4f7ecd60 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub049:4064876:4064876 [2] NCCL INFO cudaDriverVersion 12010 +gpub049:4064876:4064876 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:4064876:4064876 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:4064876:4064939 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:4064876:4064939 [2] NCCL INFO Using network IB +gpub049:4064876:4064939 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub049:4064876:4064939 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub049:4064876:4064939 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub049:4064876:4064939 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub049:4064876:4064939 [2] NCCL INFO Connected all rings +gpub049:4064876:4064939 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub049:4064876:4064939 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub049:4064876:4064939 [2] NCCL INFO Connected all trees +gpub049:4064876:4064939 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:4064876:4064939 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:4064876:4064939 [2] NCCL INFO comm 0xb89777d0 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub032:3289603:3289603 [0] NCCL INFO cudaDriverVersion 12010 +gpub032:3289603:3289603 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:3289603:3289603 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:3289603:3289688 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:3289603:3289688 [0] NCCL INFO Using network IB +gpub032:3289603:3289688 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub032:3289603:3289688 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub032:3289603:3289688 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub032:3289603:3289688 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub032:3289603:3289688 [0] NCCL INFO Connected all rings +gpub032:3289603:3289688 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub032:3289603:3289688 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub032:3289603:3289688 [0] NCCL INFO Connected all trees +gpub032:3289603:3289688 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub032:3289603:3289688 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:3289603:3289688 [0] NCCL INFO comm 0x9f95b40 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub081:2742228:2742228 [1] NCCL INFO cudaDriverVersion 12010 +gpub081:2742228:2742228 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.181<0> +gpub081:2742228:2742228 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub081:2742228:2742316 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.181<0> +gpub081:2742228:2742316 [1] NCCL INFO Using network IB +gpub081:2742228:2742316 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub081:2742228:2742316 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub081:2742228:2742316 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub081:2742228:2742316 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub081:2742228:2742316 [1] NCCL INFO Connected all rings +gpub081:2742228:2742316 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub081:2742228:2742316 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub081:2742228:2742316 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub081:2742228:2742316 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub081:2742228:2742316 [1] NCCL INFO Connected all trees +gpub081:2742228:2742316 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub081:2742228:2742316 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub081:2742228:2742316 [1] NCCL INFO comm 0xb78a1250 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub037:1522723:1522723 [1] NCCL INFO cudaDriverVersion 12010 +gpub037:1522723:1522723 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1522723:1522723 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1522723:1522803 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1522723:1522803 [1] NCCL INFO Using network IB +gpub037:1522723:1522803 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub037:1522723:1522803 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub037:1522723:1522803 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub037:1522723:1522803 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub037:1522723:1522803 [1] NCCL INFO Connected all rings +gpub037:1522723:1522803 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub037:1522723:1522803 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub037:1522723:1522803 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub037:1522723:1522803 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub037:1522723:1522803 [1] NCCL INFO Connected all trees +gpub037:1522723:1522803 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub037:1522723:1522803 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1522723:1522803 [1] NCCL INFO comm 0xba5d23a0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub050:1879225:1879225 [0] NCCL INFO cudaDriverVersion 12010 +gpub050:1879225:1879225 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0> +gpub050:1879225:1879225 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub050:1879225:1879303 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0> +gpub050:1879225:1879303 [0] NCCL INFO Using network IB +gpub050:1879225:1879303 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub050:1879225:1879303 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub050:1879225:1879303 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub050:1879225:1879303 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub050:1879225:1879303 [0] NCCL INFO Connected all rings +gpub050:1879225:1879303 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub050:1879225:1879303 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub050:1879225:1879303 [0] NCCL INFO Connected all trees +gpub050:1879225:1879303 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub050:1879225:1879303 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub050:1879225:1879303 [0] NCCL INFO comm 0xa81f9440 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub052:1901669:1901669 [2] NCCL INFO cudaDriverVersion 12010 +gpub052:1901669:1901669 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0> +gpub052:1901669:1901669 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub052:1901669:1901751 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0> +gpub052:1901669:1901751 [2] NCCL INFO Using network IB +gpub052:1901669:1901751 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub052:1901669:1901751 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub052:1901669:1901751 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub052:1901669:1901751 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub052:1901669:1901751 [2] NCCL INFO Connected all rings +gpub052:1901669:1901751 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub052:1901669:1901751 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub052:1901669:1901751 [2] NCCL INFO Connected all trees +gpub052:1901669:1901751 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub052:1901669:1901751 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub052:1901669:1901751 [2] NCCL INFO comm 0x50c05250 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub037:1522724:1522724 [2] NCCL INFO cudaDriverVersion 12010 +gpub037:1522724:1522724 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1522724:1522724 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1522724:1522800 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1522724:1522800 [2] NCCL INFO Using network IB +gpub037:1522724:1522800 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub037:1522724:1522800 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub037:1522724:1522800 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub037:1522724:1522800 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub037:1522724:1522800 [2] NCCL INFO Connected all rings +gpub037:1522724:1522800 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub037:1522724:1522800 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub037:1522724:1522800 [2] NCCL INFO Connected all trees +gpub037:1522724:1522800 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub037:1522724:1522800 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1522724:1522800 [2] NCCL INFO comm 0xab8ed350 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub082:1518445:1518445 [0] NCCL INFO cudaDriverVersion 12010 +gpub082:1518445:1518445 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0> +gpub082:1518445:1518445 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub082:1518445:1518527 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.182<0> +gpub082:1518445:1518527 [0] NCCL INFO Using network IB +gpub082:1518445:1518527 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub082:1518445:1518527 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub082:1518445:1518527 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub082:1518445:1518527 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub082:1518445:1518527 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub082:1518445:1518527 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub082:1518445:1518527 [0] NCCL INFO Connected all rings +gpub082:1518445:1518527 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub082:1518445:1518527 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub082:1518445:1518527 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub082:1518445:1518527 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub082:1518445:1518527 [0] NCCL INFO Connected all trees +gpub082:1518445:1518527 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub082:1518445:1518527 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub082:1518445:1518527 [0] NCCL INFO comm 0x519aa9d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub081:2742229:2742229 [2] NCCL INFO cudaDriverVersion 12010 +gpub081:2742229:2742229 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.181<0> +gpub081:2742229:2742229 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub081:2742229:2742315 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.181<0> +gpub081:2742229:2742315 [2] NCCL INFO Using network IB +gpub081:2742229:2742315 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub081:2742229:2742315 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub081:2742229:2742315 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub081:2742229:2742315 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub081:2742229:2742315 [2] NCCL INFO Connected all rings +gpub081:2742229:2742315 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub081:2742229:2742315 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub081:2742229:2742315 [2] NCCL INFO Connected all trees +gpub081:2742229:2742315 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub081:2742229:2742315 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub081:2742229:2742315 [2] NCCL INFO comm 0x50f92c00 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub026:2433085:2433085 [1] NCCL INFO cudaDriverVersion 12010 +gpub026:2433085:2433085 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:2433085:2433085 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:2433085:2433165 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0> +gpub026:2433085:2433165 [1] NCCL INFO Using network IB +gpub026:2433085:2433165 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub026:2433085:2433165 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub026:2433085:2433165 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub026:2433085:2433165 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub026:2433085:2433165 [1] NCCL INFO Connected all rings +gpub026:2433085:2433165 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub026:2433085:2433165 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub026:2433085:2433165 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub026:2433085:2433165 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub026:2433085:2433165 [1] NCCL INFO Connected all trees +gpub026:2433085:2433165 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:2433085:2433165 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:2433085:2433165 [1] NCCL INFO comm 0xb7dab990 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub049:4064875:4064875 [1] NCCL INFO cudaDriverVersion 12010 +gpub049:4064875:4064875 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0> +gpub049:4064875:4064875 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub049:4064875:4064940 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0> +gpub049:4064875:4064940 [1] NCCL INFO Using network IB +gpub049:4064875:4064940 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub049:4064875:4064940 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub049:4064875:4064940 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub049:4064875:4064940 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub049:4064875:4064940 [1] NCCL INFO Connected all rings +gpub049:4064875:4064940 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub049:4064875:4064940 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub049:4064875:4064940 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub049:4064875:4064940 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub049:4064875:4064940 [1] NCCL INFO Connected all trees +gpub049:4064875:4064940 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub049:4064875:4064940 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub049:4064875:4064940 [1] NCCL INFO comm 0xa8769be0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub037:1522722:1522722 [0] NCCL INFO cudaDriverVersion 12010 +gpub037:1522722:1522722 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1522722:1522722 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1522722:1522802 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1522722:1522802 [0] NCCL INFO Using network IB +gpub037:1522722:1522802 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub037:1522722:1522802 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub037:1522722:1522802 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1522722:1522802 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub037:1522722:1522802 [0] NCCL INFO Connected all rings +gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1522722:1522802 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub037:1522722:1522802 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub037:1522722:1522802 [0] NCCL INFO Connected all trees +gpub037:1522722:1522802 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub037:1522722:1522802 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1522722:1522802 [0] NCCL INFO comm 0x514cae40 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub053:1664489:1664489 [3] NCCL INFO cudaDriverVersion 12010 +gpub053:1664489:1664489 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:1664489:1664489 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:1664489:1664559 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0> +gpub053:1664489:1664559 [3] NCCL INFO Using network IB +gpub053:1664489:1664559 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub053:1664489:1664559 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub053:1664489:1664559 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub053:1664489:1664559 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub053:1664489:1664559 [3] NCCL INFO Connected all rings +gpub053:1664489:1664559 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub053:1664489:1664559 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub053:1664489:1664559 [3] NCCL INFO Connected all trees +gpub053:1664489:1664559 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:1664489:1664559 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:1664489:1664559 [3] NCCL INFO comm 0xa9e28fe0 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub037:1522725:1522725 [3] NCCL INFO cudaDriverVersion 12010 +gpub037:1522725:1522725 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0> +gpub037:1522725:1522725 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub037:1522725:1522801 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0> +gpub037:1522725:1522801 [3] NCCL INFO Using network IB +gpub037:1522725:1522801 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub037:1522725:1522801 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub037:1522725:1522801 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub037:1522725:1522801 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub037:1522725:1522801 [3] NCCL INFO Connected all rings +gpub037:1522725:1522801 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub037:1522725:1522801 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub037:1522725:1522801 [3] NCCL INFO Connected all trees +gpub037:1522725:1522801 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub037:1522725:1522801 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub037:1522725:1522801 [3] NCCL INFO comm 0x4f7df910 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub036:1870499:1870499 [3] NCCL INFO cudaDriverVersion 12010 +gpub036:1870499:1870499 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.136<0> +gpub036:1870499:1870499 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub036:1870499:1870581 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.136<0> +gpub036:1870499:1870581 [3] NCCL INFO Using network IB +gpub036:1870499:1870581 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub036:1870499:1870581 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub036:1870499:1870581 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub036:1870499:1870581 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub036:1870499:1870581 [3] NCCL INFO Connected all rings +gpub036:1870499:1870581 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub036:1870499:1870581 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub036:1870499:1870581 [3] NCCL INFO Connected all trees +gpub036:1870499:1870581 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub036:1870499:1870581 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub036:1870499:1870581 [3] NCCL INFO comm 0xa269c50 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub015:0/64] 2023-07-04 13:14:42,343 (trainer:732) INFO: 11epoch:train:1-100batch: iter_time=1.167, forward_time=0.250, loss_ctc=83.800, loss_att=63.903, acc=0.671, loss=69.872, backward_time=1.249, grad_norm=95.018, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.181, optim0_lr0=1.178e-04, train_time=5.980 +[gpub015:0/64] 2023-07-04 13:17:19,812 (trainer:732) INFO: 11epoch:train:101-200batch: iter_time=9.388e-05, forward_time=0.141, loss_ctc=70.875, loss_att=53.678, acc=0.690, loss=58.837, backward_time=1.239, grad_norm=82.003, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.178e-04, train_time=3.150 +[gpub015:0/64] 2023-07-04 13:19:56,533 (trainer:732) INFO: 11epoch:train:201-300batch: iter_time=9.683e-05, forward_time=0.140, loss_ctc=73.277, loss_att=62.501, acc=0.661, loss=65.734, backward_time=1.236, grad_norm=87.548, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.177e-04, train_time=3.134 +[gpub015:0/64] 2023-07-04 13:22:35,853 (trainer:732) INFO: 11epoch:train:301-400batch: iter_time=1.014e-04, forward_time=0.141, loss_ctc=79.358, loss_att=59.074, acc=0.690, loss=65.159, backward_time=1.241, grad_norm=91.769, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.176e-04, train_time=3.186 +[gpub015:0/64] 2023-07-04 13:25:20,103 (trainer:732) INFO: 11epoch:train:401-500batch: iter_time=1.025e-04, forward_time=0.140, loss_ctc=72.801, loss_att=54.986, acc=0.664, loss=60.331, backward_time=1.248, grad_norm=99.444, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.176e-04, train_time=3.285 +[gpub015:0/64] 2023-07-04 13:28:00,155 (trainer:732) INFO: 11epoch:train:501-600batch: iter_time=9.845e-05, forward_time=0.140, loss_ctc=72.472, loss_att=58.148, acc=0.641, loss=62.446, backward_time=1.241, grad_norm=85.575, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.175e-04, train_time=3.201 +[gpub015:0/64] 2023-07-04 13:30:47,766 (trainer:732) INFO: 11epoch:train:601-700batch: iter_time=1.032e-04, forward_time=0.142, loss_ctc=74.485, loss_att=62.987, acc=0.667, loss=66.436, backward_time=1.247, grad_norm=81.333, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.174e-04, train_time=3.352 +[gpub015:0/64] 2023-07-04 13:33:37,533 (trainer:732) INFO: 11epoch:train:701-800batch: iter_time=1.023e-04, forward_time=0.140, loss_ctc=81.733, loss_att=62.947, acc=0.691, loss=68.583, backward_time=1.245, grad_norm=94.965, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.174e-04, train_time=3.395 +[gpub015:0/64] 2023-07-04 13:34:38,137 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub015:0/64] 2023-07-04 13:34:56,174 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 13:34:59,472 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 13:34:59,473 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub015:0/64] 2023-07-04 13:34:59,479 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 13:40:59,440 (trainer:732) INFO: 11epoch:train:801-900batch: iter_time=1.311, forward_time=0.142, loss_ctc=85.490, loss_att=68.439, acc=0.664, loss=73.554, backward_time=1.245, grad_norm=86.487, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.173e-04, train_time=8.838 +[gpub015:0/64] 2023-07-04 13:43:36,689 (trainer:732) INFO: 11epoch:train:901-1000batch: iter_time=1.334e-04, forward_time=0.144, loss_ctc=72.168, loss_att=52.441, acc=0.692, loss=58.359, backward_time=1.237, grad_norm=87.029, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.172e-04, train_time=3.145 +[gpub015:0/64] 2023-07-04 13:46:13,656 (trainer:732) INFO: 11epoch:train:1001-1100batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=71.855, loss_att=63.794, acc=0.650, loss=66.212, backward_time=1.237, grad_norm=91.081, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.172e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 13:48:50,728 (trainer:732) INFO: 11epoch:train:1101-1200batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=78.754, loss_att=62.586, acc=0.672, loss=67.436, backward_time=1.237, grad_norm=94.410, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.171e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 13:51:27,702 (trainer:732) INFO: 11epoch:train:1201-1300batch: iter_time=1.265e-04, forward_time=0.144, loss_ctc=74.458, loss_att=56.373, acc=0.665, loss=61.798, backward_time=1.236, grad_norm=85.002, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.170e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 13:54:04,338 (trainer:732) INFO: 11epoch:train:1301-1400batch: iter_time=1.328e-04, forward_time=0.144, loss_ctc=74.356, loss_att=61.107, acc=0.633, loss=65.082, backward_time=1.236, grad_norm=96.927, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.170e-04, train_time=3.133 +[gpub015:0/64] 2023-07-04 13:56:40,929 (trainer:732) INFO: 11epoch:train:1401-1500batch: iter_time=1.283e-04, forward_time=0.144, loss_ctc=70.043, loss_att=58.437, acc=0.645, loss=61.919, backward_time=1.235, grad_norm=79.036, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.169e-04, train_time=3.132 +[gpub015:0/64] 2023-07-04 13:59:17,863 (trainer:732) INFO: 11epoch:train:1501-1600batch: iter_time=1.270e-04, forward_time=0.145, loss_ctc=75.925, loss_att=57.323, acc=0.695, loss=62.903, backward_time=1.237, grad_norm=80.385, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.168e-04, train_time=3.138 +[gpub015:0/64] 2023-07-04 14:01:03,303 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub015:0/64] 2023-07-04 14:01:21,149 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 14:01:24,530 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 14:01:24,530 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub015:0/64] 2023-07-04 14:01:24,537 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 14:05:06,649 (trainer:732) INFO: 11epoch:train:1601-1700batch: iter_time=1.217, forward_time=0.143, loss_ctc=84.818, loss_att=70.745, acc=0.672, loss=74.967, backward_time=1.246, grad_norm=97.726, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.168e-04, train_time=6.976 +[gpub015:0/64] 2023-07-04 14:07:44,458 (trainer:732) INFO: 11epoch:train:1701-1800batch: iter_time=1.115e-04, forward_time=0.144, loss_ctc=71.343, loss_att=51.236, acc=0.688, loss=57.268, backward_time=1.239, grad_norm=103.277, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.179, optim0_lr0=1.167e-04, train_time=3.156 +[gpub015:0/64] 2023-07-04 14:10:21,624 (trainer:732) INFO: 11epoch:train:1801-1900batch: iter_time=1.042e-04, forward_time=0.143, loss_ctc=73.993, loss_att=60.990, acc=0.670, loss=64.891, backward_time=1.239, grad_norm=98.902, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.179, optim0_lr0=1.167e-04, train_time=3.143 +[gpub015:0/64] 2023-07-04 14:12:58,635 (trainer:732) INFO: 11epoch:train:1901-2000batch: iter_time=1.055e-04, forward_time=0.143, loss_ctc=77.280, loss_att=60.422, acc=0.672, loss=65.479, backward_time=1.239, grad_norm=99.361, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.179, optim0_lr0=1.166e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 14:15:35,403 (trainer:732) INFO: 11epoch:train:2001-2100batch: iter_time=1.027e-04, forward_time=0.144, loss_ctc=71.434, loss_att=55.009, acc=0.670, loss=59.936, backward_time=1.236, grad_norm=89.022, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.165e-04, train_time=3.135 +[gpub015:0/64] 2023-07-04 14:18:12,306 (trainer:732) INFO: 11epoch:train:2101-2200batch: iter_time=9.694e-05, forward_time=0.143, loss_ctc=80.484, loss_att=62.826, acc=0.656, loss=68.123, backward_time=1.238, grad_norm=91.242, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.165e-04, train_time=3.138 +[gpub015:0/64] 2023-07-04 14:20:48,909 (trainer:732) INFO: 11epoch:train:2201-2300batch: iter_time=1.006e-04, forward_time=0.142, loss_ctc=66.251, loss_att=54.235, acc=0.643, loss=57.840, backward_time=1.236, grad_norm=75.648, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.164e-04, train_time=3.132 +[gpub015:0/64] 2023-07-04 14:23:25,954 (trainer:732) INFO: 11epoch:train:2301-2400batch: iter_time=9.957e-05, forward_time=0.143, loss_ctc=76.969, loss_att=61.945, acc=0.675, loss=66.452, backward_time=1.239, grad_norm=86.558, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.163e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 14:26:02,381 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub015:0/64] 2023-07-04 14:26:20,628 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 14:26:24,038 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 14:26:24,038 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub015:0/64] 2023-07-04 14:26:24,044 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 14:29:27,606 (trainer:732) INFO: 11epoch:train:2401-2500batch: iter_time=1.234, forward_time=0.142, loss_ctc=83.156, loss_att=65.525, acc=0.675, loss=70.814, backward_time=1.245, grad_norm=87.609, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.163e-04, train_time=7.233 +[gpub015:0/64] 2023-07-04 14:32:05,977 (trainer:732) INFO: 11epoch:train:2501-2600batch: iter_time=9.809e-05, forward_time=0.143, loss_ctc=78.756, loss_att=58.058, acc=0.689, loss=64.267, backward_time=1.245, grad_norm=89.189, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.162e-04, train_time=3.167 +[gpub015:0/64] 2023-07-04 14:34:43,361 (trainer:732) INFO: 11epoch:train:2601-2700batch: iter_time=8.955e-05, forward_time=0.141, loss_ctc=73.761, loss_att=55.772, acc=0.686, loss=61.169, backward_time=1.240, grad_norm=84.815, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.162e-04, train_time=3.147 +[gpub015:0/64] 2023-07-04 14:37:20,015 (trainer:732) INFO: 11epoch:train:2701-2800batch: iter_time=1.035e-04, forward_time=0.142, loss_ctc=73.778, loss_att=62.326, acc=0.658, loss=65.762, backward_time=1.236, grad_norm=88.821, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.161e-04, train_time=3.133 +[gpub015:0/64] 2023-07-04 14:39:56,726 (trainer:732) INFO: 11epoch:train:2801-2900batch: iter_time=1.067e-04, forward_time=0.142, loss_ctc=77.384, loss_att=58.538, acc=0.690, loss=64.192, backward_time=1.237, grad_norm=82.802, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.160e-04, train_time=3.134 +[gpub015:0/64] 2023-07-04 14:42:33,340 (trainer:732) INFO: 11epoch:train:2901-3000batch: iter_time=1.122e-04, forward_time=0.142, loss_ctc=70.876, loss_att=53.471, acc=0.667, loss=58.692, backward_time=1.236, grad_norm=78.599, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.160e-04, train_time=3.132 +[gpub015:0/64] 2023-07-04 14:45:10,899 (trainer:732) INFO: 11epoch:train:3001-3100batch: iter_time=1.203e-04, forward_time=0.142, loss_ctc=71.915, loss_att=58.489, acc=0.639, loss=62.517, backward_time=1.238, grad_norm=79.638, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.159e-04, train_time=3.151 +[gpub015:0/64] 2023-07-04 14:47:55,543 (trainer:732) INFO: 11epoch:train:3101-3200batch: iter_time=1.053e-04, forward_time=0.143, loss_ctc=73.017, loss_att=63.582, acc=0.657, loss=66.413, backward_time=1.249, grad_norm=104.209, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.158e-04, train_time=3.293 +[gpub015:0/64] 2023-07-04 14:50:39,507 (trainer:732) INFO: 11epoch:train:3201-3300batch: iter_time=1.197e-04, forward_time=0.143, loss_ctc=79.855, loss_att=60.339, acc=0.696, loss=66.194, backward_time=1.242, grad_norm=90.048, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.158e-04, train_time=3.279 +[gpub015:0/64] 2023-07-04 14:51:31,262 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub015:0/64] 2023-07-04 14:51:49,250 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 14:51:52,772 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 14:51:52,773 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub015:0/64] 2023-07-04 14:51:52,779 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 14:57:01,433 (trainer:732) INFO: 11epoch:train:3301-3400batch: iter_time=1.215, forward_time=0.143, loss_ctc=83.729, loss_att=64.933, acc=0.671, loss=70.572, backward_time=1.246, grad_norm=93.661, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.157e-04, train_time=7.638 +[gpub015:0/64] 2023-07-04 14:59:38,410 (trainer:732) INFO: 11epoch:train:3401-3500batch: iter_time=1.059e-04, forward_time=0.142, loss_ctc=72.534, loss_att=53.184, acc=0.696, loss=58.989, backward_time=1.236, grad_norm=82.069, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.157e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 15:02:15,990 (trainer:732) INFO: 11epoch:train:3501-3600batch: iter_time=1.292e-04, forward_time=0.142, loss_ctc=72.125, loss_att=60.773, acc=0.657, loss=64.179, backward_time=1.242, grad_norm=79.186, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.156e-04, train_time=3.151 +[gpub015:0/64] 2023-07-04 15:04:52,996 (trainer:732) INFO: 11epoch:train:3601-3700batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=76.260, loss_att=59.534, acc=0.681, loss=64.551, backward_time=1.239, grad_norm=93.112, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.155e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 15:07:29,645 (trainer:732) INFO: 11epoch:train:3701-3800batch: iter_time=1.134e-04, forward_time=0.143, loss_ctc=75.428, loss_att=54.548, acc=0.673, loss=60.812, backward_time=1.236, grad_norm=79.497, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.155e-04, train_time=3.133 +[gpub015:0/64] 2023-07-04 15:10:06,182 (trainer:732) INFO: 11epoch:train:3801-3900batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=71.937, loss_att=59.873, acc=0.630, loss=63.492, backward_time=1.235, grad_norm=89.457, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.154e-04, train_time=3.131 +[gpub015:0/64] 2023-07-04 15:12:43,022 (trainer:732) INFO: 11epoch:train:3901-4000batch: iter_time=1.081e-04, forward_time=0.143, loss_ctc=71.573, loss_att=63.440, acc=0.652, loss=65.880, backward_time=1.237, grad_norm=79.058, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.153e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 15:15:19,772 (trainer:732) INFO: 11epoch:train:4001-4100batch: iter_time=1.220e-04, forward_time=0.143, loss_ctc=74.521, loss_att=56.145, acc=0.691, loss=61.658, backward_time=1.237, grad_norm=80.871, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.179, optim0_lr0=1.153e-04, train_time=3.135 +[gpub015:0/64] 2023-07-04 15:17:04,683 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub015:0/64] 2023-07-04 15:17:22,602 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 15:17:26,004 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 15:17:26,005 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub015:0/64] 2023-07-04 15:17:26,011 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 15:21:23,202 (trainer:732) INFO: 11epoch:train:4101-4200batch: iter_time=1.199, forward_time=0.143, loss_ctc=81.712, loss_att=65.217, acc=0.675, loss=70.165, backward_time=1.247, grad_norm=87.365, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.179, optim0_lr0=1.152e-04, train_time=7.268 +[gpub015:0/64] 2023-07-04 15:24:00,969 (trainer:732) INFO: 11epoch:train:4201-4300batch: iter_time=1.045e-04, forward_time=0.144, loss_ctc=72.537, loss_att=51.252, acc=0.700, loss=57.638, backward_time=1.240, grad_norm=85.262, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.152e-04, train_time=3.155 +[gpub015:0/64] 2023-07-04 15:26:37,814 (trainer:732) INFO: 11epoch:train:4301-4400batch: iter_time=1.208e-04, forward_time=0.142, loss_ctc=71.997, loss_att=59.331, acc=0.679, loss=63.131, backward_time=1.238, grad_norm=103.945, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.151e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 15:29:14,481 (trainer:732) INFO: 11epoch:train:4401-4500batch: iter_time=1.185e-04, forward_time=0.143, loss_ctc=78.545, loss_att=61.794, acc=0.684, loss=66.819, backward_time=1.237, grad_norm=96.020, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.150e-04, train_time=3.133 +[gpub015:0/64] 2023-07-04 15:31:51,324 (trainer:732) INFO: 11epoch:train:4501-4600batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=72.669, loss_att=54.380, acc=0.683, loss=59.867, backward_time=1.236, grad_norm=94.169, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.150e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 15:34:28,101 (trainer:732) INFO: 11epoch:train:4601-4700batch: iter_time=1.150e-04, forward_time=0.142, loss_ctc=78.958, loss_att=61.475, acc=0.651, loss=66.720, backward_time=1.237, grad_norm=82.362, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.149e-04, train_time=3.135 +[gpub015:0/64] 2023-07-04 15:37:05,127 (trainer:732) INFO: 11epoch:train:4701-4800batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=67.006, loss_att=56.681, acc=0.658, loss=59.779, backward_time=1.237, grad_norm=76.677, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.149e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 15:39:42,072 (trainer:732) INFO: 11epoch:train:4801-4900batch: iter_time=1.144e-04, forward_time=0.142, loss_ctc=76.017, loss_att=60.305, acc=0.690, loss=65.018, backward_time=1.238, grad_norm=93.325, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.148e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 15:42:18,923 (trainer:732) INFO: 11epoch:train:4901-5000batch: iter_time=1.163e-04, forward_time=0.143, loss_ctc=80.694, loss_att=63.853, acc=0.683, loss=68.905, backward_time=1.239, grad_norm=94.593, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.147e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 15:42:20,304 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub015:0/64] 2023-07-04 15:42:38,424 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 15:42:41,782 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 15:42:41,782 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub015:0/64] 2023-07-04 15:42:41,788 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 15:48:19,632 (trainer:732) INFO: 11epoch:train:5001-5100batch: iter_time=1.237, forward_time=0.144, loss_ctc=78.222, loss_att=59.300, acc=0.679, loss=64.976, backward_time=1.247, grad_norm=95.238, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.147e-04, train_time=7.214 +[gpub015:0/64] 2023-07-04 15:50:56,912 (trainer:732) INFO: 11epoch:train:5101-5200batch: iter_time=1.348e-04, forward_time=0.142, loss_ctc=70.037, loss_att=54.648, acc=0.687, loss=59.265, backward_time=1.236, grad_norm=85.950, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.146e-04, train_time=3.145 +[gpub015:0/64] 2023-07-04 15:53:33,723 (trainer:732) INFO: 11epoch:train:5201-5300batch: iter_time=1.410e-04, forward_time=0.143, loss_ctc=74.060, loss_att=61.954, acc=0.662, loss=65.585, backward_time=1.236, grad_norm=92.304, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.146e-04, train_time=3.136 +[gpub015:0/64] 2023-07-04 15:56:10,431 (trainer:732) INFO: 11epoch:train:5301-5400batch: iter_time=1.411e-04, forward_time=0.144, loss_ctc=74.817, loss_att=56.868, acc=0.686, loss=62.253, backward_time=1.235, grad_norm=79.959, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.145e-04, train_time=3.134 +[gpub015:0/64] 2023-07-04 15:58:47,203 (trainer:732) INFO: 11epoch:train:5401-5500batch: iter_time=1.433e-04, forward_time=0.144, loss_ctc=73.246, loss_att=57.581, acc=0.663, loss=62.281, backward_time=1.236, grad_norm=89.297, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.144e-04, train_time=3.135 +[gpub015:0/64] 2023-07-04 16:01:23,856 (trainer:732) INFO: 11epoch:train:5501-5600batch: iter_time=1.234e-04, forward_time=0.143, loss_ctc=69.266, loss_att=52.780, acc=0.658, loss=57.726, backward_time=1.235, grad_norm=84.838, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.144e-04, train_time=3.133 +[gpub015:0/64] 2023-07-04 16:04:00,841 (trainer:732) INFO: 11epoch:train:5601-5700batch: iter_time=1.416e-04, forward_time=0.144, loss_ctc=70.474, loss_att=61.191, acc=0.661, loss=63.976, backward_time=1.238, grad_norm=85.157, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.143e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 16:06:37,726 (trainer:732) INFO: 11epoch:train:5701-5800batch: iter_time=1.304e-04, forward_time=0.143, loss_ctc=83.723, loss_att=67.490, acc=0.687, loss=72.360, backward_time=1.237, grad_norm=82.489, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.143e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 16:07:32,415 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub015:0/64] 2023-07-04 16:07:50,213 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 16:07:53,634 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 16:07:53,634 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub015:0/64] 2023-07-04 16:07:53,641 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 16:12:26,124 (trainer:732) INFO: 11epoch:train:5801-5900batch: iter_time=1.216, forward_time=0.142, loss_ctc=78.153, loss_att=58.359, acc=0.683, loss=64.297, backward_time=1.245, grad_norm=81.488, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.142e-04, train_time=6.968 +[gpub015:0/64] 2023-07-04 16:15:03,120 (trainer:732) INFO: 11epoch:train:5901-6000batch: iter_time=1.015e-04, forward_time=0.141, loss_ctc=75.076, loss_att=52.764, acc=0.698, loss=59.457, backward_time=1.236, grad_norm=87.247, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.141e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 16:17:40,160 (trainer:732) INFO: 11epoch:train:6001-6100batch: iter_time=1.041e-04, forward_time=0.142, loss_ctc=72.470, loss_att=62.178, acc=0.673, loss=65.265, backward_time=1.237, grad_norm=83.009, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.141e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 16:20:17,104 (trainer:732) INFO: 11epoch:train:6101-6200batch: iter_time=1.004e-04, forward_time=0.144, loss_ctc=74.977, loss_att=58.241, acc=0.691, loss=63.262, backward_time=1.238, grad_norm=88.495, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.140e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 16:22:54,294 (trainer:732) INFO: 11epoch:train:6201-6300batch: iter_time=1.117e-04, forward_time=0.142, loss_ctc=73.711, loss_att=55.176, acc=0.683, loss=60.737, backward_time=1.237, grad_norm=84.983, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.140e-04, train_time=3.144 +[gpub015:0/64] 2023-07-04 16:25:30,906 (trainer:732) INFO: 11epoch:train:6301-6400batch: iter_time=1.066e-04, forward_time=0.142, loss_ctc=75.193, loss_att=58.962, acc=0.652, loss=63.832, backward_time=1.237, grad_norm=87.664, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.139e-04, train_time=3.132 +[gpub015:0/64] 2023-07-04 16:28:07,562 (trainer:732) INFO: 11epoch:train:6401-6500batch: iter_time=1.093e-04, forward_time=0.142, loss_ctc=67.375, loss_att=55.362, acc=0.673, loss=58.966, backward_time=1.236, grad_norm=87.593, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.138e-04, train_time=3.133 +[gpub015:0/64] 2023-07-04 16:30:44,706 (trainer:732) INFO: 11epoch:train:6501-6600batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=74.061, loss_att=54.804, acc=0.709, loss=60.581, backward_time=1.238, grad_norm=79.167, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.138e-04, train_time=3.143 +[gpub015:0/64] 2023-07-04 16:32:33,226 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub015:0/64] 2023-07-04 16:32:51,116 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 16:32:54,503 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 16:32:54,503 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub015:0/64] 2023-07-04 16:32:54,509 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 16:36:49,987 (trainer:732) INFO: 11epoch:train:6601-6700batch: iter_time=1.193, forward_time=0.144, loss_ctc=83.775, loss_att=70.165, acc=0.684, loss=74.248, backward_time=1.261, grad_norm=89.442, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.137e-04, train_time=7.305 +[gpub015:0/64] 2023-07-04 16:39:27,770 (trainer:732) INFO: 11epoch:train:6701-6800batch: iter_time=1.102e-04, forward_time=0.142, loss_ctc=72.289, loss_att=51.250, acc=0.688, loss=57.562, backward_time=1.238, grad_norm=89.186, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.137e-04, train_time=3.155 +[gpub015:0/64] 2023-07-04 16:42:09,188 (trainer:732) INFO: 11epoch:train:6801-6900batch: iter_time=1.029e-04, forward_time=0.143, loss_ctc=73.752, loss_att=61.099, acc=0.672, loss=64.895, backward_time=1.240, grad_norm=83.648, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.136e-04, train_time=3.228 +[gpub015:0/64] 2023-07-04 16:44:51,998 (trainer:732) INFO: 11epoch:train:6901-7000batch: iter_time=1.079e-04, forward_time=0.150, loss_ctc=74.119, loss_att=58.766, acc=0.679, loss=63.372, backward_time=1.244, grad_norm=117.258, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.177, optim0_lr0=1.135e-04, train_time=3.256 +[gpub015:0/64] 2023-07-04 16:47:37,040 (trainer:732) INFO: 11epoch:train:7001-7100batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=70.548, loss_att=54.290, acc=0.676, loss=59.167, backward_time=1.239, grad_norm=82.573, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.177, optim0_lr0=1.135e-04, train_time=3.301 +[gpub015:0/64] 2023-07-04 16:50:15,687 (trainer:732) INFO: 11epoch:train:7101-7200batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=77.779, loss_att=60.926, acc=0.663, loss=65.982, backward_time=1.237, grad_norm=82.395, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.177, optim0_lr0=1.134e-04, train_time=3.173 +[gpub015:0/64] 2023-07-04 16:52:52,296 (trainer:732) INFO: 11epoch:train:7201-7300batch: iter_time=1.123e-04, forward_time=0.141, loss_ctc=68.907, loss_att=54.263, acc=0.649, loss=58.656, backward_time=1.235, grad_norm=81.090, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.134e-04, train_time=3.132 +[gpub015:0/64] 2023-07-04 16:55:29,304 (trainer:732) INFO: 11epoch:train:7301-7400batch: iter_time=1.139e-04, forward_time=0.143, loss_ctc=73.895, loss_att=61.349, acc=0.678, loss=65.113, backward_time=1.238, grad_norm=86.829, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.133e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 16:58:06,207 (trainer:732) INFO: 11epoch:train:7401-7500batch: iter_time=1.001e-04, forward_time=0.142, loss_ctc=80.322, loss_att=63.802, acc=0.685, loss=68.758, backward_time=1.236, grad_norm=94.311, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.133e-04, train_time=3.138 +[gpub015:0/64] 2023-07-04 16:58:11,666 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub015:0/64] 2023-07-04 16:58:29,313 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 16:58:32,661 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 16:58:32,661 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub015:0/64] 2023-07-04 16:58:32,667 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 17:05:39,174 (trainer:732) INFO: 11epoch:train:7501-7600batch: iter_time=1.528, forward_time=0.144, loss_ctc=78.582, loss_att=59.317, acc=0.684, loss=65.097, backward_time=1.252, grad_norm=88.598, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.132e-04, train_time=9.059 +[gpub015:0/64] 2023-07-04 17:08:17,107 (trainer:732) INFO: 11epoch:train:7601-7700batch: iter_time=1.000e-04, forward_time=0.143, loss_ctc=69.421, loss_att=54.039, acc=0.703, loss=58.653, backward_time=1.241, grad_norm=91.904, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.131e-04, train_time=3.158 +[gpub015:0/64] 2023-07-04 17:10:54,037 (trainer:732) INFO: 11epoch:train:7701-7800batch: iter_time=1.044e-04, forward_time=0.142, loss_ctc=72.978, loss_att=61.569, acc=0.673, loss=64.992, backward_time=1.239, grad_norm=94.885, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.131e-04, train_time=3.138 +[gpub015:0/64] 2023-07-04 17:13:31,101 (trainer:732) INFO: 11epoch:train:7801-7900batch: iter_time=9.732e-05, forward_time=0.143, loss_ctc=75.174, loss_att=54.680, acc=0.694, loss=60.828, backward_time=1.239, grad_norm=80.141, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.130e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 17:16:07,930 (trainer:732) INFO: 11epoch:train:7901-8000batch: iter_time=1.003e-04, forward_time=0.143, loss_ctc=72.983, loss_att=56.893, acc=0.676, loss=61.720, backward_time=1.238, grad_norm=83.576, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.130e-04, train_time=3.136 +[gpub015:0/64] 2023-07-04 17:18:44,592 (trainer:732) INFO: 11epoch:train:8001-8100batch: iter_time=1.019e-04, forward_time=0.143, loss_ctc=71.290, loss_att=54.002, acc=0.666, loss=59.188, backward_time=1.237, grad_norm=104.543, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.129e-04, train_time=3.133 +[gpub015:0/64] 2023-07-04 17:21:21,553 (trainer:732) INFO: 11epoch:train:8101-8200batch: iter_time=1.072e-04, forward_time=0.143, loss_ctc=71.502, loss_att=61.136, acc=0.676, loss=64.245, backward_time=1.238, grad_norm=79.194, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.129e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 17:23:58,799 (trainer:732) INFO: 11epoch:train:8201-8300batch: iter_time=9.818e-05, forward_time=0.144, loss_ctc=81.675, loss_att=65.649, acc=0.699, loss=70.457, backward_time=1.241, grad_norm=95.853, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.128e-04, train_time=3.145 +[gpub015:0/64] 2023-07-04 17:24:53,534 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub015:0/64] 2023-07-04 17:25:11,166 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 17:25:14,530 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 17:25:14,530 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub015:0/64] 2023-07-04 17:25:14,536 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 17:29:58,326 (trainer:732) INFO: 11epoch:train:8301-8400batch: iter_time=1.239, forward_time=0.142, loss_ctc=77.508, loss_att=57.390, acc=0.690, loss=63.426, backward_time=1.251, grad_norm=84.822, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.127e-04, train_time=7.190 +[gpub015:0/64] 2023-07-04 17:32:39,866 (trainer:732) INFO: 11epoch:train:8401-8500batch: iter_time=1.145e-04, forward_time=0.143, loss_ctc=71.150, loss_att=51.139, acc=0.704, loss=57.143, backward_time=1.244, grad_norm=74.997, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.127e-04, train_time=3.231 +[gpub015:0/64] 2023-07-04 17:35:18,227 (trainer:732) INFO: 11epoch:train:8501-8600batch: iter_time=1.087e-04, forward_time=0.142, loss_ctc=70.281, loss_att=61.581, acc=0.677, loss=64.191, backward_time=1.238, grad_norm=88.779, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.126e-04, train_time=3.167 +[gpub015:0/64] 2023-07-04 17:38:03,061 (trainer:732) INFO: 11epoch:train:8601-8700batch: iter_time=0.002, forward_time=0.166, loss_ctc=74.328, loss_att=57.549, acc=0.693, loss=62.583, backward_time=1.255, grad_norm=120.714, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.126e-04, train_time=3.296 +[gpub015:0/64] 2023-07-04 17:40:40,597 (trainer:732) INFO: 11epoch:train:8701-8800batch: iter_time=1.146e-04, forward_time=0.144, loss_ctc=72.996, loss_att=53.756, acc=0.687, loss=59.528, backward_time=1.239, grad_norm=77.639, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.125e-04, train_time=3.150 +[gpub015:0/64] 2023-07-04 17:43:18,170 (trainer:732) INFO: 11epoch:train:8801-8900batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=72.990, loss_att=58.467, acc=0.657, loss=62.824, backward_time=1.238, grad_norm=79.134, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.183, optim0_lr0=1.125e-04, train_time=3.151 +[gpub015:0/64] 2023-07-04 17:46:06,875 (trainer:732) INFO: 11epoch:train:8901-9000batch: iter_time=1.090e-04, forward_time=0.179, loss_ctc=67.241, loss_att=54.706, acc=0.676, loss=58.467, backward_time=1.250, grad_norm=75.286, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.181, optim0_lr0=1.124e-04, train_time=3.374 +[gpub015:0/64] 2023-07-04 17:48:51,294 (trainer:732) INFO: 11epoch:train:9001-9100batch: iter_time=1.047e-04, forward_time=0.143, loss_ctc=75.541, loss_att=55.910, acc=0.706, loss=61.799, backward_time=1.246, grad_norm=79.268, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.123e-04, train_time=3.288 +[gpub015:0/64] 2023-07-04 17:50:45,067 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub015:0/64] 2023-07-04 17:51:02,822 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 17:51:06,236 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 17:51:06,236 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub015:0/64] 2023-07-04 17:51:06,242 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 17:55:34,613 (trainer:732) INFO: 11epoch:train:9101-9200batch: iter_time=1.493, forward_time=0.145, loss_ctc=84.206, loss_att=70.331, acc=0.687, loss=74.493, backward_time=1.252, grad_norm=92.347, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.123e-04, train_time=8.066 +[gpub015:0/64] 2023-07-04 17:58:14,578 (trainer:732) INFO: 11epoch:train:9201-9300batch: iter_time=1.225e-04, forward_time=0.144, loss_ctc=72.015, loss_att=51.510, acc=0.694, loss=57.662, backward_time=1.240, grad_norm=101.263, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.122e-04, train_time=3.199 +[gpub015:0/64] 2023-07-04 18:00:53,333 (trainer:732) INFO: 11epoch:train:9301-9400batch: iter_time=1.103e-04, forward_time=0.148, loss_ctc=71.528, loss_att=60.448, acc=0.694, loss=63.772, backward_time=1.241, grad_norm=82.338, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.122e-04, train_time=3.175 +[gpub015:0/64] 2023-07-04 18:03:34,243 (trainer:732) INFO: 11epoch:train:9401-9500batch: iter_time=1.096e-04, forward_time=0.152, loss_ctc=73.079, loss_att=58.605, acc=0.691, loss=62.947, backward_time=1.240, grad_norm=88.620, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.121e-04, train_time=3.218 +[gpub015:0/64] 2023-07-04 18:06:11,084 (trainer:732) INFO: 11epoch:train:9501-9600batch: iter_time=1.217e-04, forward_time=0.144, loss_ctc=69.793, loss_att=52.272, acc=0.687, loss=57.529, backward_time=1.237, grad_norm=86.909, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.121e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 18:08:48,008 (trainer:732) INFO: 11epoch:train:9601-9700batch: iter_time=1.100e-04, forward_time=0.143, loss_ctc=77.129, loss_att=60.450, acc=0.676, loss=65.454, backward_time=1.238, grad_norm=89.294, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.120e-04, train_time=3.138 +[gpub015:0/64] 2023-07-04 18:11:24,516 (trainer:732) INFO: 11epoch:train:9701-9800batch: iter_time=1.087e-04, forward_time=0.142, loss_ctc=66.975, loss_att=52.929, acc=0.666, loss=57.143, backward_time=1.236, grad_norm=82.429, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.119e-04, train_time=3.130 +[gpub015:0/64] 2023-07-04 18:14:01,398 (trainer:732) INFO: 11epoch:train:9801-9900batch: iter_time=1.081e-04, forward_time=0.143, loss_ctc=74.627, loss_att=61.438, acc=0.686, loss=65.395, backward_time=1.237, grad_norm=80.146, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.119e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 18:16:38,364 (trainer:732) INFO: 11epoch:train:9901-10000batch: iter_time=1.095e-04, forward_time=0.144, loss_ctc=80.252, loss_att=63.991, acc=0.691, loss=68.869, backward_time=1.238, grad_norm=89.399, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.118e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 18:29:09,297 (trainer:338) INFO: 11epoch results: [train] iter_time=0.153, forward_time=0.145, loss_ctc=74.807, loss_att=58.923, acc=0.676, loss=63.688, backward_time=1.240, grad_norm=88.098, clip=100.000, loss_scale=7.147e+11, optim_step_time=0.178, optim0_lr0=1.147e-04, train_time=3.683, time=5 hours, 7 minutes and 6.9 seconds, total_count=80000, gpu_max_cached_mem_GB=33.838, [valid] loss_ctc=61.632, cer_ctc=0.324, loss_att=49.266, acc=0.614, cer=0.445, wer=1.000, loss=52.976, time=6 minutes and 24.4 seconds, total_count=8602, gpu_max_cached_mem_GB=37.133, [att_plot] time=5 minutes and 54.75 seconds, total_count=0, gpu_max_cached_mem_GB=37.133 +[gpub015:0/64] 2023-07-04 18:29:26,715 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub015:0/64] 2023-07-04 18:29:26,720 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/6epoch.pth +[gpub015:0/64] 2023-07-04 18:29:26,777 (trainer:272) INFO: 12/100epoch started. Estimated time to finish: 2 weeks, 5 days and 18 hours +[gpub015:0/64] 2023-07-04 18:29:28,074 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub015:0/64] 2023-07-04 18:29:45,861 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 18:29:49,136 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 18:29:49,136 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub015:0/64] 2023-07-04 18:29:49,212 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 18:35:37,004 (trainer:732) INFO: 12epoch:train:1-100batch: iter_time=2.064, forward_time=0.171, loss_ctc=84.536, loss_att=72.238, acc=0.649, loss=75.927, backward_time=1.250, grad_norm=121.129, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.118e-04, train_time=7.389 +[gpub015:0/64] 2023-07-04 18:38:17,593 (trainer:732) INFO: 12epoch:train:101-200batch: iter_time=1.095e-04, forward_time=0.144, loss_ctc=73.120, loss_att=54.091, acc=0.688, loss=59.800, backward_time=1.248, grad_norm=107.383, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.117e-04, train_time=3.212 +[gpub015:0/64] 2023-07-04 18:41:05,153 (trainer:732) INFO: 12epoch:train:201-300batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=70.845, loss_att=60.210, acc=0.659, loss=63.401, backward_time=1.249, grad_norm=88.294, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.117e-04, train_time=3.351 +[gpub015:0/64] 2023-07-04 18:43:44,855 (trainer:732) INFO: 12epoch:train:301-400batch: iter_time=1.144e-04, forward_time=0.144, loss_ctc=85.017, loss_att=69.739, acc=0.635, loss=74.322, backward_time=1.244, grad_norm=98.262, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.116e-04, train_time=3.194 +[gpub015:0/64] 2023-07-04 18:46:25,802 (trainer:732) INFO: 12epoch:train:401-500batch: iter_time=1.111e-04, forward_time=0.144, loss_ctc=74.240, loss_att=58.589, acc=0.662, loss=63.284, backward_time=1.244, grad_norm=86.501, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.116e-04, train_time=3.219 +[gpub015:0/64] 2023-07-04 18:49:05,301 (trainer:732) INFO: 12epoch:train:501-600batch: iter_time=1.124e-04, forward_time=0.145, loss_ctc=76.668, loss_att=63.744, acc=0.656, loss=67.622, backward_time=1.245, grad_norm=100.634, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.115e-04, train_time=3.190 +[gpub015:0/64] 2023-07-04 18:51:45,925 (trainer:732) INFO: 12epoch:train:601-700batch: iter_time=1.142e-04, forward_time=0.144, loss_ctc=81.493, loss_att=63.434, acc=0.662, loss=68.852, backward_time=1.243, grad_norm=121.231, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.114e-04, train_time=3.212 +[gpub015:0/64] 2023-07-04 18:54:26,636 (trainer:732) INFO: 12epoch:train:701-800batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=80.088, loss_att=59.983, acc=0.669, loss=66.014, backward_time=1.245, grad_norm=96.921, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.114e-04, train_time=3.214 +[gpub015:0/64] 2023-07-04 18:55:23,266 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub015:0/64] 2023-07-04 18:55:40,570 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 18:55:43,896 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 18:55:43,896 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub015:0/64] 2023-07-04 18:55:43,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 18:59:47,620 (trainer:732) INFO: 12epoch:train:801-900batch: iter_time=1.289, forward_time=0.166, loss_ctc=86.833, loss_att=71.102, acc=0.669, loss=75.821, backward_time=1.252, grad_norm=107.388, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.113e-04, train_time=6.419 +[gpub015:0/64] 2023-07-04 19:02:25,633 (trainer:732) INFO: 12epoch:train:901-1000batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=74.625, loss_att=53.239, acc=0.692, loss=59.655, backward_time=1.243, grad_norm=84.263, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.113e-04, train_time=3.160 +[gpub015:0/64] 2023-07-04 19:05:02,569 (trainer:732) INFO: 12epoch:train:1001-1100batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=62.727, loss_att=49.968, acc=0.684, loss=53.796, backward_time=1.239, grad_norm=147.092, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.112e-04, train_time=3.138 +[gpub015:0/64] 2023-07-04 19:07:40,033 (trainer:732) INFO: 12epoch:train:1101-1200batch: iter_time=1.236e-04, forward_time=0.145, loss_ctc=85.676, loss_att=74.606, acc=0.647, loss=77.927, backward_time=1.242, grad_norm=97.002, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.112e-04, train_time=3.149 +[gpub015:0/64] 2023-07-04 19:10:17,138 (trainer:732) INFO: 12epoch:train:1201-1300batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=75.522, loss_att=58.402, acc=0.663, loss=63.538, backward_time=1.241, grad_norm=92.267, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.111e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 19:12:54,130 (trainer:732) INFO: 12epoch:train:1301-1400batch: iter_time=1.219e-04, forward_time=0.144, loss_ctc=72.477, loss_att=61.256, acc=0.664, loss=64.622, backward_time=1.241, grad_norm=85.313, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.111e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 19:15:31,210 (trainer:732) INFO: 12epoch:train:1401-1500batch: iter_time=1.172e-04, forward_time=0.144, loss_ctc=81.927, loss_att=61.508, acc=0.682, loss=67.634, backward_time=1.242, grad_norm=94.138, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.110e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 19:18:08,327 (trainer:732) INFO: 12epoch:train:1501-1600batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=79.107, loss_att=62.684, acc=0.673, loss=67.611, backward_time=1.242, grad_norm=109.704, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.109e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 19:20:01,909 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub015:0/64] 2023-07-04 19:20:19,627 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 19:20:23,028 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 19:20:23,028 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub015:0/64] 2023-07-04 19:20:23,034 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 19:23:59,707 (trainer:732) INFO: 12epoch:train:1601-1700batch: iter_time=1.736, forward_time=0.146, loss_ctc=86.653, loss_att=68.901, acc=0.661, loss=74.226, backward_time=1.254, grad_norm=89.859, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.109e-04, train_time=7.027 +[gpub015:0/64] 2023-07-04 19:26:37,403 (trainer:732) INFO: 12epoch:train:1701-1800batch: iter_time=1.135e-04, forward_time=0.147, loss_ctc=77.426, loss_att=60.332, acc=0.688, loss=65.461, backward_time=1.245, grad_norm=85.067, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.108e-04, train_time=3.154 +[gpub015:0/64] 2023-07-04 19:29:14,527 (trainer:732) INFO: 12epoch:train:1801-1900batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=64.854, loss_att=49.130, acc=0.702, loss=53.847, backward_time=1.242, grad_norm=88.422, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.108e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 19:31:51,842 (trainer:732) INFO: 12epoch:train:1901-2000batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=74.672, loss_att=64.816, acc=0.661, loss=67.773, backward_time=1.243, grad_norm=86.333, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.107e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 19:34:29,137 (trainer:732) INFO: 12epoch:train:2001-2100batch: iter_time=1.190e-04, forward_time=0.147, loss_ctc=84.626, loss_att=67.119, acc=0.658, loss=72.371, backward_time=1.243, grad_norm=95.382, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.107e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 19:37:06,442 (trainer:732) INFO: 12epoch:train:2101-2200batch: iter_time=1.147e-04, forward_time=0.147, loss_ctc=70.258, loss_att=56.040, acc=0.670, loss=60.306, backward_time=1.243, grad_norm=107.485, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.106e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 19:39:43,817 (trainer:732) INFO: 12epoch:train:2201-2300batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=74.006, loss_att=60.037, acc=0.680, loss=64.228, backward_time=1.243, grad_norm=107.315, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.106e-04, train_time=3.147 +[gpub015:0/64] 2023-07-04 19:42:21,229 (trainer:732) INFO: 12epoch:train:2301-2400batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=86.224, loss_att=67.922, acc=0.683, loss=73.412, backward_time=1.243, grad_norm=104.049, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.105e-04, train_time=3.148 +[gpub015:0/64] 2023-07-04 19:44:58,641 (trainer:732) INFO: 12epoch:train:2401-2500batch: iter_time=1.103e-04, forward_time=0.148, loss_ctc=75.849, loss_att=55.993, acc=0.677, loss=61.950, backward_time=1.242, grad_norm=96.455, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.105e-04, train_time=3.148 +[gpub015:0/64] 2023-07-04 19:45:01,701 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub015:0/64] 2023-07-04 19:45:19,346 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 19:45:22,688 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 19:45:22,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub015:0/64] 2023-07-04 19:45:22,694 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 19:51:26,255 (trainer:732) INFO: 12epoch:train:2501-2600batch: iter_time=1.225, forward_time=0.175, loss_ctc=81.079, loss_att=68.853, acc=0.671, loss=72.521, backward_time=1.255, grad_norm=89.970, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.104e-04, train_time=7.751 +[gpub015:0/64] 2023-07-04 19:54:04,278 (trainer:732) INFO: 12epoch:train:2601-2700batch: iter_time=1.185e-04, forward_time=0.144, loss_ctc=71.385, loss_att=52.315, acc=0.699, loss=58.036, backward_time=1.244, grad_norm=77.476, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.103e-04, train_time=3.161 +[gpub015:0/64] 2023-07-04 19:56:41,442 (trainer:732) INFO: 12epoch:train:2701-2800batch: iter_time=1.104e-04, forward_time=0.144, loss_ctc=67.155, loss_att=55.880, acc=0.678, loss=59.262, backward_time=1.242, grad_norm=77.514, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.103e-04, train_time=3.143 +[gpub015:0/64] 2023-07-04 19:59:18,844 (trainer:732) INFO: 12epoch:train:2801-2900batch: iter_time=1.069e-04, forward_time=0.145, loss_ctc=83.629, loss_att=65.947, acc=0.656, loss=71.251, backward_time=1.242, grad_norm=88.064, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.102e-04, train_time=3.148 +[gpub015:0/64] 2023-07-04 20:01:55,894 (trainer:732) INFO: 12epoch:train:2901-3000batch: iter_time=1.108e-04, forward_time=0.144, loss_ctc=74.662, loss_att=58.929, acc=0.668, loss=63.649, backward_time=1.242, grad_norm=86.125, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.102e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 20:04:33,377 (trainer:732) INFO: 12epoch:train:3001-3100batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=74.376, loss_att=60.452, acc=0.678, loss=64.629, backward_time=1.244, grad_norm=89.490, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.101e-04, train_time=3.149 +[gpub015:0/64] 2023-07-04 20:07:10,509 (trainer:732) INFO: 12epoch:train:3101-3200batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=77.433, loss_att=60.732, acc=0.681, loss=65.743, backward_time=1.242, grad_norm=105.819, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.101e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 20:09:47,663 (trainer:732) INFO: 12epoch:train:3201-3300batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=77.566, loss_att=60.190, acc=0.678, loss=65.403, backward_time=1.242, grad_norm=127.184, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.100e-04, train_time=3.143 +[gpub015:0/64] 2023-07-04 20:10:48,102 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub015:0/64] 2023-07-04 20:11:06,164 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 20:11:09,520 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 20:11:09,520 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub015:0/64] 2023-07-04 20:11:09,526 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 20:16:56,469 (trainer:732) INFO: 12epoch:train:3301-3400batch: iter_time=1.721, forward_time=0.145, loss_ctc=81.802, loss_att=68.440, acc=0.662, loss=72.449, backward_time=1.251, grad_norm=120.608, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.100e-04, train_time=8.576 +[gpub015:0/64] 2023-07-04 20:19:33,743 (trainer:732) INFO: 12epoch:train:3401-3500batch: iter_time=1.214e-04, forward_time=0.144, loss_ctc=76.097, loss_att=57.732, acc=0.683, loss=63.241, backward_time=1.240, grad_norm=86.251, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.099e-04, train_time=3.145 +[gpub015:0/64] 2023-07-04 20:22:10,967 (trainer:732) INFO: 12epoch:train:3501-3600batch: iter_time=1.174e-04, forward_time=0.144, loss_ctc=62.531, loss_att=47.624, acc=0.692, loss=52.096, backward_time=1.240, grad_norm=76.543, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.099e-04, train_time=3.144 +[gpub015:0/64] 2023-07-04 20:24:48,258 (trainer:732) INFO: 12epoch:train:3601-3700batch: iter_time=1.147e-04, forward_time=0.145, loss_ctc=82.753, loss_att=73.085, acc=0.642, loss=75.986, backward_time=1.242, grad_norm=93.445, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.098e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 20:27:25,282 (trainer:732) INFO: 12epoch:train:3701-3800batch: iter_time=1.122e-04, forward_time=0.144, loss_ctc=79.961, loss_att=63.845, acc=0.651, loss=68.680, backward_time=1.241, grad_norm=94.879, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.098e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 20:30:02,221 (trainer:732) INFO: 12epoch:train:3801-3900batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=70.155, loss_att=59.692, acc=0.663, loss=62.831, backward_time=1.241, grad_norm=81.352, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.097e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 20:32:39,167 (trainer:732) INFO: 12epoch:train:3901-4000batch: iter_time=1.407e-04, forward_time=0.144, loss_ctc=80.943, loss_att=62.078, acc=0.682, loss=67.738, backward_time=1.242, grad_norm=92.320, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.097e-04, train_time=3.139 +[gpub015:0/64] 2023-07-04 20:35:16,227 (trainer:732) INFO: 12epoch:train:4001-4100batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=76.768, loss_att=59.276, acc=0.678, loss=64.524, backward_time=1.241, grad_norm=102.125, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.096e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 20:37:02,841 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub015:0/64] 2023-07-04 20:37:20,845 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 20:37:24,221 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 20:37:24,221 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub015:0/64] 2023-07-04 20:37:24,227 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 20:41:28,761 (trainer:732) INFO: 12epoch:train:4101-4200batch: iter_time=1.213, forward_time=0.145, loss_ctc=82.650, loss_att=63.715, acc=0.660, loss=69.395, backward_time=1.255, grad_norm=108.665, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.096e-04, train_time=7.450 +[gpub015:0/64] 2023-07-04 20:44:06,299 (trainer:732) INFO: 12epoch:train:4201-4300batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=77.528, loss_att=61.049, acc=0.680, loss=65.993, backward_time=1.243, grad_norm=102.226, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.095e-04, train_time=3.151 +[gpub015:0/64] 2023-07-04 20:46:43,366 (trainer:732) INFO: 12epoch:train:4301-4400batch: iter_time=1.237e-04, forward_time=0.144, loss_ctc=63.974, loss_att=49.928, acc=0.703, loss=54.141, backward_time=1.241, grad_norm=85.946, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.094e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 20:49:20,761 (trainer:732) INFO: 12epoch:train:4401-4500batch: iter_time=1.296e-04, forward_time=0.145, loss_ctc=73.590, loss_att=63.027, acc=0.653, loss=66.196, backward_time=1.243, grad_norm=98.824, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.094e-04, train_time=3.148 +[gpub015:0/64] 2023-07-04 20:51:57,869 (trainer:732) INFO: 12epoch:train:4501-4600batch: iter_time=1.230e-04, forward_time=0.143, loss_ctc=86.089, loss_att=68.894, acc=0.647, loss=74.053, backward_time=1.241, grad_norm=98.658, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.093e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 20:54:34,865 (trainer:732) INFO: 12epoch:train:4601-4700batch: iter_time=1.307e-04, forward_time=0.144, loss_ctc=69.746, loss_att=61.107, acc=0.658, loss=63.699, backward_time=1.241, grad_norm=101.548, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.093e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 20:57:11,892 (trainer:732) INFO: 12epoch:train:4701-4800batch: iter_time=1.379e-04, forward_time=0.144, loss_ctc=72.222, loss_att=59.132, acc=0.676, loss=63.059, backward_time=1.240, grad_norm=95.294, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.092e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 20:59:49,193 (trainer:732) INFO: 12epoch:train:4801-4900batch: iter_time=1.299e-04, forward_time=0.144, loss_ctc=84.221, loss_att=67.911, acc=0.672, loss=72.804, backward_time=1.242, grad_norm=97.856, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.092e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 21:02:26,288 (trainer:732) INFO: 12epoch:train:4901-5000batch: iter_time=1.076e-04, forward_time=0.145, loss_ctc=77.713, loss_att=56.351, acc=0.676, loss=62.760, backward_time=1.241, grad_norm=93.021, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.091e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 21:02:29,142 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub015:0/64] 2023-07-04 21:02:47,188 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 21:02:50,583 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 21:02:50,583 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub015:0/64] 2023-07-04 21:02:50,589 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 21:09:23,316 (trainer:732) INFO: 12epoch:train:5001-5100batch: iter_time=1.207, forward_time=0.146, loss_ctc=82.085, loss_att=70.491, acc=0.667, loss=73.969, backward_time=1.255, grad_norm=101.219, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.091e-04, train_time=8.340 +[gpub015:0/64] 2023-07-04 21:12:00,519 (trainer:732) INFO: 12epoch:train:5101-5200batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=70.631, loss_att=52.245, acc=0.702, loss=57.761, backward_time=1.241, grad_norm=82.535, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.090e-04, train_time=3.144 +[gpub015:0/64] 2023-07-04 21:14:37,398 (trainer:732) INFO: 12epoch:train:5201-5300batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=69.574, loss_att=59.873, acc=0.662, loss=62.783, backward_time=1.240, grad_norm=83.957, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.090e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 21:17:14,881 (trainer:732) INFO: 12epoch:train:5301-5400batch: iter_time=9.520e-05, forward_time=0.145, loss_ctc=80.138, loss_att=66.006, acc=0.645, loss=70.246, backward_time=1.242, grad_norm=92.850, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.089e-04, train_time=3.149 +[gpub015:0/64] 2023-07-04 21:19:51,908 (trainer:732) INFO: 12epoch:train:5401-5500batch: iter_time=9.896e-05, forward_time=0.144, loss_ctc=71.912, loss_att=58.300, acc=0.672, loss=62.384, backward_time=1.240, grad_norm=92.965, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.089e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 21:22:28,899 (trainer:732) INFO: 12epoch:train:5501-5600batch: iter_time=9.511e-05, forward_time=0.144, loss_ctc=74.006, loss_att=59.170, acc=0.672, loss=63.621, backward_time=1.241, grad_norm=83.684, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.088e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 21:25:05,777 (trainer:732) INFO: 12epoch:train:5601-5700batch: iter_time=9.316e-05, forward_time=0.143, loss_ctc=77.242, loss_att=61.429, acc=0.675, loss=66.173, backward_time=1.240, grad_norm=96.974, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.088e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 21:27:42,708 (trainer:732) INFO: 12epoch:train:5701-5800batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=77.205, loss_att=58.937, acc=0.675, loss=64.417, backward_time=1.240, grad_norm=105.464, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.087e-04, train_time=3.138 +[gpub015:0/64] 2023-07-04 21:28:38,964 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub015:0/64] 2023-07-04 21:28:56,893 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 21:29:00,277 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 21:29:00,278 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub015:0/64] 2023-07-04 21:29:00,284 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 21:33:26,178 (trainer:732) INFO: 12epoch:train:5801-5900batch: iter_time=1.191, forward_time=0.146, loss_ctc=82.820, loss_att=69.559, acc=0.671, loss=73.537, backward_time=1.262, grad_norm=95.044, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.087e-04, train_time=6.869 +[gpub015:0/64] 2023-07-04 21:36:03,771 (trainer:732) INFO: 12epoch:train:5901-6000batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=75.434, loss_att=55.881, acc=0.695, loss=61.747, backward_time=1.242, grad_norm=85.721, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.086e-04, train_time=3.152 +[gpub015:0/64] 2023-07-04 21:38:40,878 (trainer:732) INFO: 12epoch:train:6001-6100batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=61.211, loss_att=46.498, acc=0.698, loss=50.912, backward_time=1.241, grad_norm=79.707, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.086e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 21:41:18,354 (trainer:732) INFO: 12epoch:train:6101-6200batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=81.359, loss_att=70.422, acc=0.664, loss=73.703, backward_time=1.243, grad_norm=100.635, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.085e-04, train_time=3.149 +[gpub015:0/64] 2023-07-04 21:43:55,529 (trainer:732) INFO: 12epoch:train:6201-6300batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=77.317, loss_att=64.201, acc=0.663, loss=68.136, backward_time=1.242, grad_norm=85.881, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.085e-04, train_time=3.143 +[gpub015:0/64] 2023-07-04 21:46:32,894 (trainer:732) INFO: 12epoch:train:6301-6400batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=69.240, loss_att=57.327, acc=0.672, loss=60.901, backward_time=1.241, grad_norm=90.918, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.084e-04, train_time=3.147 +[gpub015:0/64] 2023-07-04 21:49:10,067 (trainer:732) INFO: 12epoch:train:6401-6500batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=80.419, loss_att=61.954, acc=0.689, loss=67.493, backward_time=1.242, grad_norm=100.821, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.084e-04, train_time=3.143 +[gpub015:0/64] 2023-07-04 21:51:47,235 (trainer:732) INFO: 12epoch:train:6501-6600batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=76.050, loss_att=60.487, acc=0.687, loss=65.156, backward_time=1.242, grad_norm=88.698, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.083e-04, train_time=3.143 +[gpub015:0/64] 2023-07-04 21:53:35,633 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub015:0/64] 2023-07-04 21:53:54,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 21:53:57,417 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 21:53:57,417 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub015:0/64] 2023-07-04 21:53:57,423 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 21:59:20,328 (trainer:732) INFO: 12epoch:train:6601-6700batch: iter_time=1.193, forward_time=0.147, loss_ctc=82.390, loss_att=62.650, acc=0.665, loss=68.572, backward_time=1.253, grad_norm=98.318, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.083e-04, train_time=9.062 +[gpub015:0/64] 2023-07-04 22:01:58,471 (trainer:732) INFO: 12epoch:train:6701-6800batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=76.609, loss_att=61.085, acc=0.679, loss=65.742, backward_time=1.244, grad_norm=106.235, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.082e-04, train_time=3.163 +[gpub015:0/64] 2023-07-04 22:04:35,355 (trainer:732) INFO: 12epoch:train:6801-6900batch: iter_time=1.242e-04, forward_time=0.143, loss_ctc=64.435, loss_att=50.254, acc=0.706, loss=54.508, backward_time=1.240, grad_norm=81.368, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.082e-04, train_time=3.137 +[gpub015:0/64] 2023-07-04 22:07:12,751 (trainer:732) INFO: 12epoch:train:6901-7000batch: iter_time=1.163e-04, forward_time=0.144, loss_ctc=70.848, loss_att=61.231, acc=0.665, loss=64.116, backward_time=1.241, grad_norm=86.145, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.081e-04, train_time=3.148 +[gpub015:0/64] 2023-07-04 22:09:49,941 (trainer:732) INFO: 12epoch:train:7001-7100batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=83.009, loss_att=68.382, acc=0.652, loss=72.770, backward_time=1.243, grad_norm=95.705, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.081e-04, train_time=3.144 +[gpub015:0/64] 2023-07-04 22:12:26,941 (trainer:732) INFO: 12epoch:train:7101-7200batch: iter_time=9.946e-05, forward_time=0.144, loss_ctc=70.979, loss_att=56.996, acc=0.668, loss=61.190, backward_time=1.242, grad_norm=78.799, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.080e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 22:15:04,098 (trainer:732) INFO: 12epoch:train:7201-7300batch: iter_time=1.017e-04, forward_time=0.144, loss_ctc=70.377, loss_att=56.229, acc=0.680, loss=60.473, backward_time=1.242, grad_norm=84.014, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.080e-04, train_time=3.143 +[gpub015:0/64] 2023-07-04 22:17:41,598 (trainer:732) INFO: 12epoch:train:7301-7400batch: iter_time=9.492e-05, forward_time=0.144, loss_ctc=81.351, loss_att=66.341, acc=0.673, loss=70.844, backward_time=1.243, grad_norm=93.304, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.079e-04, train_time=3.150 +[gpub015:0/64] 2023-07-04 22:20:18,910 (trainer:732) INFO: 12epoch:train:7401-7500batch: iter_time=1.063e-04, forward_time=0.144, loss_ctc=78.117, loss_att=55.626, acc=0.691, loss=62.374, backward_time=1.242, grad_norm=96.027, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.079e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 22:20:21,637 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub015:0/64] 2023-07-04 22:20:39,406 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 22:20:42,788 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 22:20:42,788 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub015:0/64] 2023-07-04 22:20:42,794 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 22:26:34,393 (trainer:732) INFO: 12epoch:train:7501-7600batch: iter_time=1.201, forward_time=0.146, loss_ctc=78.276, loss_att=65.547, acc=0.682, loss=69.366, backward_time=1.258, grad_norm=125.636, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.078e-04, train_time=7.509 +[gpub015:0/64] 2023-07-04 22:29:11,707 (trainer:732) INFO: 12epoch:train:7601-7700batch: iter_time=1.155e-04, forward_time=0.145, loss_ctc=69.956, loss_att=52.275, acc=0.704, loss=57.579, backward_time=1.241, grad_norm=86.535, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.078e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 22:31:48,758 (trainer:732) INFO: 12epoch:train:7701-7800batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=66.350, loss_att=56.272, acc=0.683, loss=59.295, backward_time=1.241, grad_norm=81.376, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.077e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 22:34:26,515 (trainer:732) INFO: 12epoch:train:7801-7900batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=81.444, loss_att=63.780, acc=0.666, loss=69.079, backward_time=1.243, grad_norm=82.021, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.077e-04, train_time=3.155 +[gpub015:0/64] 2023-07-04 22:37:03,817 (trainer:732) INFO: 12epoch:train:7901-8000batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=73.305, loss_att=58.553, acc=0.673, loss=62.979, backward_time=1.242, grad_norm=86.271, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.076e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 22:39:41,184 (trainer:732) INFO: 12epoch:train:8001-8100batch: iter_time=1.047e-04, forward_time=0.146, loss_ctc=72.842, loss_att=59.680, acc=0.680, loss=63.629, backward_time=1.243, grad_norm=83.633, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.076e-04, train_time=3.147 +[gpub015:0/64] 2023-07-04 22:42:18,296 (trainer:732) INFO: 12epoch:train:8101-8200batch: iter_time=1.052e-04, forward_time=0.145, loss_ctc=76.362, loss_att=60.168, acc=0.687, loss=65.026, backward_time=1.242, grad_norm=110.533, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.075e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 22:44:55,396 (trainer:732) INFO: 12epoch:train:8201-8300batch: iter_time=9.924e-05, forward_time=0.145, loss_ctc=76.992, loss_att=57.507, acc=0.689, loss=63.352, backward_time=1.242, grad_norm=87.809, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.075e-04, train_time=3.142 +[gpub015:0/64] 2023-07-04 22:45:50,214 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub015:0/64] 2023-07-04 22:46:08,597 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 22:46:12,246 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 22:46:12,247 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub015:0/64] 2023-07-04 22:46:12,253 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 22:51:21,478 (trainer:732) INFO: 12epoch:train:8301-8400batch: iter_time=1.205, forward_time=0.144, loss_ctc=80.365, loss_att=65.058, acc=0.674, loss=69.650, backward_time=1.253, grad_norm=106.325, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.074e-04, train_time=7.721 +[gpub015:0/64] 2023-07-04 22:53:59,133 (trainer:732) INFO: 12epoch:train:8401-8500batch: iter_time=1.152e-04, forward_time=0.145, loss_ctc=74.424, loss_att=55.889, acc=0.689, loss=61.450, backward_time=1.242, grad_norm=95.701, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.074e-04, train_time=3.153 +[gpub015:0/64] 2023-07-04 22:56:36,584 (trainer:732) INFO: 12epoch:train:8501-8600batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=61.774, loss_att=47.039, acc=0.701, loss=51.460, backward_time=1.242, grad_norm=81.038, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.073e-04, train_time=3.149 +[gpub015:0/64] 2023-07-04 22:59:13,839 (trainer:732) INFO: 12epoch:train:8601-8700batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=78.654, loss_att=69.938, acc=0.653, loss=72.552, backward_time=1.243, grad_norm=89.759, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.073e-04, train_time=3.145 +[gpub015:0/64] 2023-07-04 23:01:51,155 (trainer:732) INFO: 12epoch:train:8701-8800batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=77.945, loss_att=62.783, acc=0.660, loss=67.331, backward_time=1.241, grad_norm=83.799, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.072e-04, train_time=3.146 +[gpub015:0/64] 2023-07-04 23:04:28,142 (trainer:732) INFO: 12epoch:train:8801-8900batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=70.601, loss_att=58.052, acc=0.671, loss=61.817, backward_time=1.240, grad_norm=91.330, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.072e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 23:07:05,403 (trainer:732) INFO: 12epoch:train:8901-9000batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=80.327, loss_att=60.532, acc=0.690, loss=66.471, backward_time=1.242, grad_norm=96.076, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.071e-04, train_time=3.145 +[gpub015:0/64] 2023-07-04 23:09:42,434 (trainer:732) INFO: 12epoch:train:9001-9100batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=75.994, loss_att=58.252, acc=0.683, loss=63.575, backward_time=1.241, grad_norm=91.538, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.071e-04, train_time=3.140 +[gpub015:0/64] 2023-07-04 23:11:28,947 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub015:0/64] 2023-07-04 23:11:46,760 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 23:11:50,122 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 23:11:50,122 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub015:0/64] 2023-07-04 23:11:50,128 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 23:16:39,903 (trainer:732) INFO: 12epoch:train:9101-9200batch: iter_time=1.216, forward_time=0.145, loss_ctc=81.962, loss_att=63.013, acc=0.674, loss=68.697, backward_time=1.255, grad_norm=96.644, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.070e-04, train_time=8.349 +[gpub015:0/64] 2023-07-04 23:19:18,238 (trainer:732) INFO: 12epoch:train:9201-9300batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=76.818, loss_att=60.914, acc=0.689, loss=65.685, backward_time=1.244, grad_norm=97.394, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.070e-04, train_time=3.166 +[gpub015:0/64] 2023-07-04 23:21:55,891 (trainer:732) INFO: 12epoch:train:9301-9400batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.491, loss_att=48.491, acc=0.714, loss=53.291, backward_time=1.242, grad_norm=103.178, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.069e-04, train_time=3.153 +[gpub015:0/64] 2023-07-04 23:24:33,316 (trainer:732) INFO: 12epoch:train:9401-9500batch: iter_time=1.123e-04, forward_time=0.146, loss_ctc=71.079, loss_att=60.571, acc=0.676, loss=63.723, backward_time=1.242, grad_norm=96.268, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.069e-04, train_time=3.148 +[gpub015:0/64] 2023-07-04 23:27:10,748 (trainer:732) INFO: 12epoch:train:9501-9600batch: iter_time=1.076e-04, forward_time=0.146, loss_ctc=83.740, loss_att=65.360, acc=0.670, loss=70.874, backward_time=1.243, grad_norm=101.596, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.068e-04, train_time=3.148 +[gpub015:0/64] 2023-07-04 23:29:48,320 (trainer:732) INFO: 12epoch:train:9601-9700batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=68.537, loss_att=55.883, acc=0.678, loss=59.679, backward_time=1.243, grad_norm=91.222, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.068e-04, train_time=3.151 +[gpub015:0/64] 2023-07-04 23:32:25,785 (trainer:732) INFO: 12epoch:train:9701-9800batch: iter_time=1.094e-04, forward_time=0.146, loss_ctc=71.251, loss_att=56.795, acc=0.683, loss=61.132, backward_time=1.242, grad_norm=95.223, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.067e-04, train_time=3.149 +[gpub015:0/64] 2023-07-04 23:35:03,217 (trainer:732) INFO: 12epoch:train:9801-9900batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=82.689, loss_att=66.938, acc=0.684, loss=71.663, backward_time=1.243, grad_norm=92.914, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.067e-04, train_time=3.148 +[gpub015:0/64] 2023-07-04 23:37:40,257 (trainer:732) INFO: 12epoch:train:9901-10000batch: iter_time=1.088e-04, forward_time=0.145, loss_ctc=76.321, loss_att=55.700, acc=0.689, loss=61.887, backward_time=1.240, grad_norm=106.101, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.066e-04, train_time=3.141 +[gpub015:0/64] 2023-07-04 23:50:19,270 (trainer:338) INFO: 12epoch results: [train] iter_time=0.165, forward_time=0.146, loss_ctc=76.028, loss_att=60.678, acc=0.674, loss=65.283, backward_time=1.244, grad_norm=95.160, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.091e-04, train_time=3.698, time=5 hours, 8 minutes and 28.51 seconds, total_count=90000, gpu_max_cached_mem_GB=37.139, [valid] loss_ctc=56.126, cer_ctc=0.306, loss_att=45.911, acc=0.623, cer=0.448, wer=0.996, loss=48.975, time=6 minutes and 37.11 seconds, total_count=9614, gpu_max_cached_mem_GB=37.139, [att_plot] time=5 minutes and 46.68 seconds, total_count=0, gpu_max_cached_mem_GB=37.139 +[gpub015:0/64] 2023-07-04 23:50:34,276 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub015:0/64] 2023-07-04 23:50:34,316 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/7epoch.pth +[gpub015:0/64] 2023-07-04 23:50:34,317 (trainer:272) INFO: 13/100epoch started. Estimated time to finish: 2 weeks, 5 days and 13 hours +[gpub015:0/64] 2023-07-04 23:50:34,320 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub015:0/64] 2023-07-04 23:50:51,721 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-04 23:50:55,289 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-04 23:50:55,289 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub015:0/64] 2023-07-04 23:50:55,296 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-04 23:55:20,069 (trainer:732) INFO: 13epoch:train:1-100batch: iter_time=1.207, forward_time=0.145, loss_ctc=85.987, loss_att=70.558, acc=0.640, loss=75.187, backward_time=1.257, grad_norm=124.267, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.066e-04, train_time=5.715 +[gpub015:0/64] 2023-07-04 23:58:04,071 (trainer:732) INFO: 13epoch:train:101-200batch: iter_time=0.006, forward_time=0.184, loss_ctc=73.151, loss_att=51.368, acc=0.686, loss=57.903, backward_time=1.255, grad_norm=88.292, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.182, optim0_lr0=1.065e-04, train_time=3.280 +[gpub015:0/64] 2023-07-05 00:00:49,379 (trainer:732) INFO: 13epoch:train:201-300batch: iter_time=1.286e-04, forward_time=0.174, loss_ctc=83.595, loss_att=65.113, acc=0.656, loss=70.658, backward_time=1.252, grad_norm=113.517, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.183, optim0_lr0=1.065e-04, train_time=3.306 +[gpub015:0/64] 2023-07-05 00:03:30,808 (trainer:732) INFO: 13epoch:train:301-400batch: iter_time=1.342e-04, forward_time=0.152, loss_ctc=84.466, loss_att=68.319, acc=0.661, loss=73.163, backward_time=1.243, grad_norm=105.019, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.182, optim0_lr0=1.064e-04, train_time=3.228 +[gpub015:0/64] 2023-07-05 00:06:08,085 (trainer:732) INFO: 13epoch:train:401-500batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=88.619, loss_att=76.517, acc=0.651, loss=80.147, backward_time=1.243, grad_norm=109.872, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.180, optim0_lr0=1.064e-04, train_time=3.145 +[gpub015:0/64] 2023-07-05 00:08:48,273 (trainer:732) INFO: 13epoch:train:501-600batch: iter_time=1.310e-04, forward_time=0.144, loss_ctc=78.073, loss_att=59.874, acc=0.665, loss=65.334, backward_time=1.243, grad_norm=157.357, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.063e-04, train_time=3.204 +[gpub015:0/64] 2023-07-05 00:11:32,992 (trainer:732) INFO: 13epoch:train:601-700batch: iter_time=1.264e-04, forward_time=0.143, loss_ctc=77.084, loss_att=60.101, acc=0.662, loss=65.196, backward_time=1.245, grad_norm=98.957, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.180, optim0_lr0=1.063e-04, train_time=3.294 +[gpub015:0/64] 2023-07-05 00:14:10,177 (trainer:732) INFO: 13epoch:train:701-800batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=72.641, loss_att=55.254, acc=0.691, loss=60.470, backward_time=1.241, grad_norm=97.430, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.062e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 00:15:19,090 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub015:0/64] 2023-07-05 00:15:36,248 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 00:15:39,840 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 00:15:39,840 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub015:0/64] 2023-07-05 00:15:39,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 00:21:58,716 (trainer:732) INFO: 13epoch:train:801-900batch: iter_time=1.834, forward_time=0.144, loss_ctc=75.899, loss_att=62.053, acc=0.682, loss=66.207, backward_time=1.249, grad_norm=96.235, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.062e-04, train_time=9.371 +[gpub015:0/64] 2023-07-05 00:24:36,279 (trainer:732) INFO: 13epoch:train:901-1000batch: iter_time=1.032e-04, forward_time=0.144, loss_ctc=78.048, loss_att=61.186, acc=0.667, loss=66.245, backward_time=1.242, grad_norm=122.905, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.061e-04, train_time=3.151 +[gpub015:0/64] 2023-07-05 00:27:13,670 (trainer:732) INFO: 13epoch:train:1001-1100batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=76.879, loss_att=56.861, acc=0.686, loss=62.866, backward_time=1.242, grad_norm=98.500, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.061e-04, train_time=3.148 +[gpub015:0/64] 2023-07-05 00:29:50,859 (trainer:732) INFO: 13epoch:train:1101-1200batch: iter_time=1.162e-04, forward_time=0.145, loss_ctc=85.818, loss_att=67.227, acc=0.673, loss=72.804, backward_time=1.243, grad_norm=118.579, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.060e-04, train_time=3.144 +[gpub015:0/64] 2023-07-05 00:32:28,234 (trainer:732) INFO: 13epoch:train:1201-1300batch: iter_time=1.191e-04, forward_time=0.146, loss_ctc=85.561, loss_att=71.717, acc=0.666, loss=75.871, backward_time=1.245, grad_norm=103.403, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.060e-04, train_time=3.147 +[gpub015:0/64] 2023-07-05 00:35:05,850 (trainer:732) INFO: 13epoch:train:1301-1400batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=85.789, loss_att=70.197, acc=0.673, loss=74.875, backward_time=1.245, grad_norm=113.813, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.060e-04, train_time=3.152 +[gpub015:0/64] 2023-07-05 00:37:42,827 (trainer:732) INFO: 13epoch:train:1401-1500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=76.540, loss_att=55.910, acc=0.667, loss=62.099, backward_time=1.240, grad_norm=91.779, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.059e-04, train_time=3.139 +[gpub015:0/64] 2023-07-05 00:40:20,163 (trainer:732) INFO: 13epoch:train:1501-1600batch: iter_time=1.059e-04, forward_time=0.145, loss_ctc=74.583, loss_att=60.149, acc=0.685, loss=64.479, backward_time=1.243, grad_norm=88.474, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.059e-04, train_time=3.147 +[gpub015:0/64] 2023-07-05 00:42:12,133 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub015:0/64] 2023-07-05 00:42:30,034 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 00:42:33,425 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 00:42:33,425 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub015:0/64] 2023-07-05 00:42:33,463 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 00:46:46,084 (trainer:732) INFO: 13epoch:train:1601-1700batch: iter_time=1.791, forward_time=0.145, loss_ctc=74.553, loss_att=58.445, acc=0.700, loss=63.277, backward_time=1.251, grad_norm=111.557, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.058e-04, train_time=7.718 +[gpub015:0/64] 2023-07-05 00:49:24,208 (trainer:732) INFO: 13epoch:train:1701-1800batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=75.927, loss_att=60.423, acc=0.666, loss=65.074, backward_time=1.244, grad_norm=134.424, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.058e-04, train_time=3.162 +[gpub015:0/64] 2023-07-05 00:52:01,356 (trainer:732) INFO: 13epoch:train:1801-1900batch: iter_time=1.104e-04, forward_time=0.144, loss_ctc=77.363, loss_att=57.785, acc=0.684, loss=63.658, backward_time=1.242, grad_norm=94.176, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.057e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 00:54:38,891 (trainer:732) INFO: 13epoch:train:1901-2000batch: iter_time=1.082e-04, forward_time=0.145, loss_ctc=78.779, loss_att=62.722, acc=0.687, loss=67.539, backward_time=1.243, grad_norm=96.279, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.057e-04, train_time=3.150 +[gpub015:0/64] 2023-07-05 00:57:18,535 (trainer:732) INFO: 13epoch:train:2001-2100batch: iter_time=1.124e-04, forward_time=0.145, loss_ctc=83.142, loss_att=62.800, acc=0.676, loss=68.903, backward_time=1.244, grad_norm=93.127, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.056e-04, train_time=3.193 +[gpub015:0/64] 2023-07-05 01:00:10,027 (trainer:732) INFO: 13epoch:train:2101-2200batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=87.896, loss_att=72.933, acc=0.670, loss=77.422, backward_time=1.296, grad_norm=109.444, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.056e-04, train_time=3.430 +[gpub015:0/64] 2023-07-05 01:02:47,356 (trainer:732) INFO: 13epoch:train:2201-2300batch: iter_time=1.144e-04, forward_time=0.144, loss_ctc=76.474, loss_att=59.030, acc=0.674, loss=64.263, backward_time=1.243, grad_norm=92.859, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.055e-04, train_time=3.146 +[gpub015:0/64] 2023-07-05 01:05:27,710 (trainer:732) INFO: 13epoch:train:2301-2400batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=76.529, loss_att=59.750, acc=0.682, loss=64.784, backward_time=1.244, grad_norm=89.961, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.055e-04, train_time=3.207 +[gpub015:0/64] 2023-07-05 01:08:04,714 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub015:0/64] 2023-07-05 01:08:22,388 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 01:08:25,735 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 01:08:25,735 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub015:0/64] 2023-07-05 01:08:25,742 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 01:13:37,098 (trainer:732) INFO: 13epoch:train:2401-2500batch: iter_time=1.192, forward_time=0.144, loss_ctc=70.399, loss_att=53.578, acc=0.702, loss=58.624, backward_time=1.244, grad_norm=79.956, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.054e-04, train_time=9.788 +[gpub015:0/64] 2023-07-05 01:16:16,640 (trainer:732) INFO: 13epoch:train:2501-2600batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=77.710, loss_att=67.795, acc=0.656, loss=70.770, backward_time=1.249, grad_norm=156.311, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.054e-04, train_time=3.191 +[gpub015:0/64] 2023-07-05 01:18:53,489 (trainer:732) INFO: 13epoch:train:2601-2700batch: iter_time=1.184e-04, forward_time=0.144, loss_ctc=71.797, loss_att=51.023, acc=0.687, loss=57.255, backward_time=1.240, grad_norm=156.086, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.053e-04, train_time=3.137 +[gpub015:0/64] 2023-07-05 01:21:30,516 (trainer:732) INFO: 13epoch:train:2701-2800batch: iter_time=1.030e-04, forward_time=0.144, loss_ctc=78.373, loss_att=62.065, acc=0.669, loss=66.957, backward_time=1.241, grad_norm=95.962, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.053e-04, train_time=3.140 +[gpub015:0/64] 2023-07-05 01:24:07,692 (trainer:732) INFO: 13epoch:train:2801-2900batch: iter_time=9.315e-05, forward_time=0.143, loss_ctc=83.675, loss_att=66.663, acc=0.670, loss=71.766, backward_time=1.242, grad_norm=96.929, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.052e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 01:26:45,090 (trainer:732) INFO: 13epoch:train:2901-3000batch: iter_time=1.043e-04, forward_time=0.144, loss_ctc=86.928, loss_att=75.665, acc=0.659, loss=79.044, backward_time=1.243, grad_norm=119.641, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.052e-04, train_time=3.148 +[gpub015:0/64] 2023-07-05 01:29:22,331 (trainer:732) INFO: 13epoch:train:3001-3100batch: iter_time=9.929e-05, forward_time=0.144, loss_ctc=74.752, loss_att=55.415, acc=0.672, loss=61.216, backward_time=1.242, grad_norm=108.366, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.052e-04, train_time=3.145 +[gpub015:0/64] 2023-07-05 01:31:59,171 (trainer:732) INFO: 13epoch:train:3101-3200batch: iter_time=9.663e-05, forward_time=0.143, loss_ctc=75.329, loss_att=58.040, acc=0.669, loss=63.226, backward_time=1.239, grad_norm=93.650, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.051e-04, train_time=3.137 +[gpub015:0/64] 2023-07-05 01:34:36,282 (trainer:732) INFO: 13epoch:train:3201-3300batch: iter_time=1.020e-04, forward_time=0.143, loss_ctc=71.885, loss_att=54.805, acc=0.700, loss=59.929, backward_time=1.241, grad_norm=90.742, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.051e-04, train_time=3.142 +[gpub015:0/64] 2023-07-05 01:35:27,815 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub015:0/64] 2023-07-05 01:35:46,394 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 01:35:49,849 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 01:35:49,849 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub015:0/64] 2023-07-05 01:35:49,855 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 01:42:13,884 (trainer:732) INFO: 13epoch:train:3301-3400batch: iter_time=1.232, forward_time=0.144, loss_ctc=75.439, loss_att=63.828, acc=0.678, loss=67.311, backward_time=1.257, grad_norm=94.817, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.050e-04, train_time=9.152 +[gpub015:0/64] 2023-07-05 01:44:51,700 (trainer:732) INFO: 13epoch:train:3401-3500batch: iter_time=1.174e-04, forward_time=0.145, loss_ctc=74.146, loss_att=54.359, acc=0.677, loss=60.295, backward_time=1.243, grad_norm=122.599, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.050e-04, train_time=3.156 +[gpub015:0/64] 2023-07-05 01:47:28,958 (trainer:732) INFO: 13epoch:train:3501-3600batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=73.295, loss_att=55.148, acc=0.682, loss=60.592, backward_time=1.243, grad_norm=88.590, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.049e-04, train_time=3.145 +[gpub015:0/64] 2023-07-05 01:50:06,157 (trainer:732) INFO: 13epoch:train:3601-3700batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=83.897, loss_att=65.933, acc=0.670, loss=71.322, backward_time=1.242, grad_norm=103.744, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.049e-04, train_time=3.144 +[gpub015:0/64] 2023-07-05 01:52:43,823 (trainer:732) INFO: 13epoch:train:3701-3800batch: iter_time=1.084e-04, forward_time=0.144, loss_ctc=83.779, loss_att=69.293, acc=0.660, loss=73.639, backward_time=1.243, grad_norm=108.364, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.048e-04, train_time=3.153 +[gpub015:0/64] 2023-07-05 01:55:26,107 (trainer:732) INFO: 13epoch:train:3801-3900batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=82.365, loss_att=67.557, acc=0.668, loss=71.999, backward_time=1.247, grad_norm=101.347, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.048e-04, train_time=3.245 +[gpub015:0/64] 2023-07-05 01:58:04,045 (trainer:732) INFO: 13epoch:train:3901-4000batch: iter_time=1.078e-04, forward_time=0.144, loss_ctc=74.649, loss_att=55.654, acc=0.670, loss=61.352, backward_time=1.243, grad_norm=98.467, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.047e-04, train_time=3.159 +[gpub015:0/64] 2023-07-05 02:00:41,402 (trainer:732) INFO: 13epoch:train:4001-4100batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=73.796, loss_att=60.178, acc=0.683, loss=64.263, backward_time=1.243, grad_norm=95.938, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.047e-04, train_time=3.147 +[gpub015:0/64] 2023-07-05 02:02:24,654 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub015:0/64] 2023-07-05 02:02:42,647 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 02:02:46,089 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 02:02:46,089 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub015:0/64] 2023-07-05 02:02:46,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 02:07:52,811 (trainer:732) INFO: 13epoch:train:4101-4200batch: iter_time=1.202, forward_time=0.144, loss_ctc=74.415, loss_att=55.435, acc=0.692, loss=61.129, backward_time=1.254, grad_norm=107.073, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.046e-04, train_time=8.628 +[gpub015:0/64] 2023-07-05 02:10:30,964 (trainer:732) INFO: 13epoch:train:4201-4300batch: iter_time=1.046e-04, forward_time=0.144, loss_ctc=72.424, loss_att=58.787, acc=0.670, loss=62.878, backward_time=1.242, grad_norm=122.387, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.046e-04, train_time=3.163 +[gpub015:0/64] 2023-07-05 02:13:08,002 (trainer:732) INFO: 13epoch:train:4301-4400batch: iter_time=1.043e-04, forward_time=0.144, loss_ctc=75.996, loss_att=56.412, acc=0.673, loss=62.287, backward_time=1.240, grad_norm=92.333, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.046e-04, train_time=3.141 +[gpub015:0/64] 2023-07-05 02:15:45,069 (trainer:732) INFO: 13epoch:train:4401-4500batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=77.586, loss_att=60.651, acc=0.678, loss=65.732, backward_time=1.241, grad_norm=94.112, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.045e-04, train_time=3.141 +[gpub015:0/64] 2023-07-05 02:18:22,044 (trainer:732) INFO: 13epoch:train:4501-4600batch: iter_time=1.049e-04, forward_time=0.144, loss_ctc=82.242, loss_att=64.662, acc=0.670, loss=69.936, backward_time=1.240, grad_norm=115.961, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.045e-04, train_time=3.139 +[gpub015:0/64] 2023-07-05 02:20:59,514 (trainer:732) INFO: 13epoch:train:4601-4700batch: iter_time=1.072e-04, forward_time=0.145, loss_ctc=92.408, loss_att=76.645, acc=0.664, loss=81.374, backward_time=1.244, grad_norm=106.089, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.044e-04, train_time=3.149 +[gpub015:0/64] 2023-07-05 02:23:36,368 (trainer:732) INFO: 13epoch:train:4701-4800batch: iter_time=1.045e-04, forward_time=0.144, loss_ctc=69.859, loss_att=51.347, acc=0.673, loss=56.901, backward_time=1.239, grad_norm=89.948, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.044e-04, train_time=3.137 +[gpub015:0/64] 2023-07-05 02:26:14,585 (trainer:732) INFO: 13epoch:train:4801-4900batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=77.996, loss_att=63.746, acc=0.677, loss=68.021, backward_time=1.244, grad_norm=88.365, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.043e-04, train_time=3.164 +[gpub015:0/64] 2023-07-05 02:28:53,321 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub015:0/64] 2023-07-05 02:29:11,386 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 02:29:15,170 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 02:29:15,170 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub015:0/64] 2023-07-05 02:29:15,176 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 02:32:58,371 (trainer:732) INFO: 13epoch:train:4901-5000batch: iter_time=1.213, forward_time=0.144, loss_ctc=65.774, loss_att=47.844, acc=0.702, loss=53.223, backward_time=1.251, grad_norm=75.759, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.043e-04, train_time=8.075 +[gpub015:0/64] 2023-07-05 02:35:38,624 (trainer:732) INFO: 13epoch:train:5001-5100batch: iter_time=9.429e-05, forward_time=0.147, loss_ctc=76.403, loss_att=64.562, acc=0.673, loss=68.115, backward_time=1.250, grad_norm=106.879, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.042e-04, train_time=3.205 +[gpub015:0/64] 2023-07-05 02:38:15,647 (trainer:732) INFO: 13epoch:train:5101-5200batch: iter_time=9.749e-05, forward_time=0.145, loss_ctc=71.952, loss_att=51.831, acc=0.688, loss=57.867, backward_time=1.241, grad_norm=89.609, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.042e-04, train_time=3.140 +[gpub015:0/64] 2023-07-05 02:40:52,928 (trainer:732) INFO: 13epoch:train:5201-5300batch: iter_time=9.622e-05, forward_time=0.145, loss_ctc=76.889, loss_att=62.062, acc=0.679, loss=66.510, backward_time=1.243, grad_norm=98.013, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.041e-04, train_time=3.145 +[gpub015:0/64] 2023-07-05 02:43:30,305 (trainer:732) INFO: 13epoch:train:5301-5400batch: iter_time=9.506e-05, forward_time=0.146, loss_ctc=83.260, loss_att=66.659, acc=0.681, loss=71.639, backward_time=1.244, grad_norm=97.815, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.041e-04, train_time=3.147 +[gpub015:0/64] 2023-07-05 02:46:07,967 (trainer:732) INFO: 13epoch:train:5401-5500batch: iter_time=9.793e-05, forward_time=0.145, loss_ctc=87.108, loss_att=75.168, acc=0.671, loss=78.750, backward_time=1.246, grad_norm=104.971, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.041e-04, train_time=3.153 +[gpub015:0/64] 2023-07-05 02:48:45,133 (trainer:732) INFO: 13epoch:train:5501-5600batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=73.208, loss_att=55.122, acc=0.679, loss=60.548, backward_time=1.241, grad_norm=98.135, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.040e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 02:51:22,309 (trainer:732) INFO: 13epoch:train:5601-5700batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=73.220, loss_att=56.231, acc=0.682, loss=61.328, backward_time=1.242, grad_norm=88.409, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.040e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 02:53:59,660 (trainer:732) INFO: 13epoch:train:5701-5800batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=71.839, loss_att=55.799, acc=0.701, loss=60.611, backward_time=1.242, grad_norm=86.829, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.039e-04, train_time=3.147 +[gpub015:0/64] 2023-07-05 02:54:51,451 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub015:0/64] 2023-07-05 02:55:09,616 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 02:55:12,984 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 02:55:12,984 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub015:0/64] 2023-07-05 02:55:12,991 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 03:00:53,451 (trainer:732) INFO: 13epoch:train:5801-5900batch: iter_time=1.204, forward_time=0.145, loss_ctc=74.023, loss_att=61.691, acc=0.691, loss=65.390, backward_time=1.254, grad_norm=93.924, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.039e-04, train_time=8.276 +[gpub015:0/64] 2023-07-05 03:03:31,383 (trainer:732) INFO: 13epoch:train:5901-6000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=73.747, loss_att=54.734, acc=0.677, loss=60.438, backward_time=1.243, grad_norm=92.066, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.038e-04, train_time=3.158 +[gpub015:0/64] 2023-07-05 03:06:08,513 (trainer:732) INFO: 13epoch:train:6001-6100batch: iter_time=1.025e-04, forward_time=0.143, loss_ctc=73.596, loss_att=56.085, acc=0.692, loss=61.338, backward_time=1.242, grad_norm=87.934, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.038e-04, train_time=3.142 +[gpub015:0/64] 2023-07-05 03:08:45,546 (trainer:732) INFO: 13epoch:train:6101-6200batch: iter_time=9.164e-05, forward_time=0.143, loss_ctc=82.621, loss_att=65.377, acc=0.684, loss=70.550, backward_time=1.242, grad_norm=93.644, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.037e-04, train_time=3.140 +[gpub015:0/64] 2023-07-05 03:11:22,948 (trainer:732) INFO: 13epoch:train:6201-6300batch: iter_time=9.824e-05, forward_time=0.144, loss_ctc=83.357, loss_att=69.309, acc=0.670, loss=73.524, backward_time=1.244, grad_norm=95.890, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.037e-04, train_time=3.148 +[gpub015:0/64] 2023-07-05 03:14:00,619 (trainer:732) INFO: 13epoch:train:6301-6400batch: iter_time=9.997e-05, forward_time=0.145, loss_ctc=81.690, loss_att=66.899, acc=0.678, loss=71.336, backward_time=1.245, grad_norm=112.957, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.036e-04, train_time=3.153 +[gpub015:0/64] 2023-07-05 03:16:37,757 (trainer:732) INFO: 13epoch:train:6401-6500batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=73.800, loss_att=53.459, acc=0.684, loss=59.561, backward_time=1.242, grad_norm=92.890, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.036e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 03:19:14,828 (trainer:732) INFO: 13epoch:train:6501-6600batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=74.459, loss_att=61.204, acc=0.684, loss=65.181, backward_time=1.241, grad_norm=87.396, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.036e-04, train_time=3.141 +[gpub015:0/64] 2023-07-05 03:20:57,945 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub015:0/64] 2023-07-05 03:21:15,656 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 03:21:19,045 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 03:21:19,045 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub015:0/64] 2023-07-05 03:21:19,052 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 03:25:54,443 (trainer:732) INFO: 13epoch:train:6601-6700batch: iter_time=1.218, forward_time=0.144, loss_ctc=73.140, loss_att=52.638, acc=0.707, loss=58.789, backward_time=1.251, grad_norm=94.399, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.035e-04, train_time=7.992 +[gpub015:0/64] 2023-07-05 03:28:44,608 (trainer:732) INFO: 13epoch:train:6701-6800batch: iter_time=1.096e-04, forward_time=0.144, loss_ctc=73.678, loss_att=58.389, acc=0.673, loss=62.975, backward_time=1.260, grad_norm=107.744, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.035e-04, train_time=3.403 +[gpub015:0/64] 2023-07-05 03:31:30,183 (trainer:732) INFO: 13epoch:train:6801-6900batch: iter_time=1.042e-04, forward_time=0.144, loss_ctc=73.706, loss_att=55.405, acc=0.686, loss=60.895, backward_time=1.247, grad_norm=96.906, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.034e-04, train_time=3.311 +[gpub015:0/64] 2023-07-05 03:34:08,360 (trainer:732) INFO: 13epoch:train:6901-7000batch: iter_time=1.082e-04, forward_time=0.144, loss_ctc=78.905, loss_att=61.541, acc=0.686, loss=66.750, backward_time=1.242, grad_norm=102.781, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.034e-04, train_time=3.163 +[gpub015:0/64] 2023-07-05 03:36:45,724 (trainer:732) INFO: 13epoch:train:7001-7100batch: iter_time=1.027e-04, forward_time=0.144, loss_ctc=82.124, loss_att=65.694, acc=0.673, loss=70.623, backward_time=1.242, grad_norm=99.630, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.033e-04, train_time=3.147 +[gpub015:0/64] 2023-07-05 03:39:23,628 (trainer:732) INFO: 13epoch:train:7101-7200batch: iter_time=1.048e-04, forward_time=0.147, loss_ctc=88.303, loss_att=75.234, acc=0.673, loss=79.155, backward_time=1.246, grad_norm=106.563, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.033e-04, train_time=3.158 +[gpub015:0/64] 2023-07-05 03:42:01,838 (trainer:732) INFO: 13epoch:train:7201-7300batch: iter_time=1.028e-04, forward_time=0.144, loss_ctc=68.219, loss_att=50.582, acc=0.684, loss=55.873, backward_time=1.243, grad_norm=81.465, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.033e-04, train_time=3.164 +[gpub015:0/64] 2023-07-05 03:44:39,356 (trainer:732) INFO: 13epoch:train:7301-7400batch: iter_time=1.055e-04, forward_time=0.145, loss_ctc=77.835, loss_att=62.872, acc=0.687, loss=67.360, backward_time=1.244, grad_norm=87.557, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.032e-04, train_time=3.150 +[gpub015:0/64] 2023-07-05 03:47:15,904 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub015:0/64] 2023-07-05 03:47:33,872 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 03:47:37,276 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 03:47:37,276 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub015:0/64] 2023-07-05 03:47:37,282 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 03:50:05,764 (trainer:732) INFO: 13epoch:train:7401-7500batch: iter_time=1.192, forward_time=0.144, loss_ctc=65.662, loss_att=47.694, acc=0.711, loss=53.085, backward_time=1.246, grad_norm=79.520, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.032e-04, train_time=6.528 +[gpub015:0/64] 2023-07-05 03:52:45,276 (trainer:732) INFO: 13epoch:train:7501-7600batch: iter_time=9.605e-05, forward_time=0.145, loss_ctc=70.791, loss_att=58.658, acc=0.672, loss=62.298, backward_time=1.247, grad_norm=107.372, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.031e-04, train_time=3.190 +[gpub015:0/64] 2023-07-05 03:55:23,506 (trainer:732) INFO: 13epoch:train:7601-7700batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=72.470, loss_att=52.026, acc=0.694, loss=58.159, backward_time=1.243, grad_norm=94.745, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.031e-04, train_time=3.164 +[gpub015:0/64] 2023-07-05 03:58:01,078 (trainer:732) INFO: 13epoch:train:7701-7800batch: iter_time=9.728e-05, forward_time=0.144, loss_ctc=80.052, loss_att=60.305, acc=0.687, loss=66.229, backward_time=1.241, grad_norm=113.321, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.030e-04, train_time=3.151 +[gpub015:0/64] 2023-07-05 04:00:38,495 (trainer:732) INFO: 13epoch:train:7801-7900batch: iter_time=9.455e-05, forward_time=0.144, loss_ctc=81.656, loss_att=66.033, acc=0.680, loss=70.720, backward_time=1.242, grad_norm=93.460, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.030e-04, train_time=3.148 +[gpub015:0/64] 2023-07-05 04:03:16,017 (trainer:732) INFO: 13epoch:train:7901-8000batch: iter_time=1.008e-04, forward_time=0.143, loss_ctc=85.972, loss_att=74.825, acc=0.670, loss=78.169, backward_time=1.244, grad_norm=112.494, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.029e-04, train_time=3.150 +[gpub015:0/64] 2023-07-05 04:05:53,016 (trainer:732) INFO: 13epoch:train:8001-8100batch: iter_time=1.015e-04, forward_time=0.143, loss_ctc=74.498, loss_att=55.317, acc=0.680, loss=61.071, backward_time=1.240, grad_norm=91.464, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.029e-04, train_time=3.140 +[gpub015:0/64] 2023-07-05 04:08:30,443 (trainer:732) INFO: 13epoch:train:8101-8200batch: iter_time=9.791e-05, forward_time=0.144, loss_ctc=69.863, loss_att=54.241, acc=0.685, loss=58.927, backward_time=1.242, grad_norm=82.075, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.029e-04, train_time=3.148 +[gpub015:0/64] 2023-07-05 04:11:07,674 (trainer:732) INFO: 13epoch:train:8201-8300batch: iter_time=1.092e-04, forward_time=0.144, loss_ctc=68.595, loss_att=52.123, acc=0.706, loss=57.064, backward_time=1.242, grad_norm=79.546, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.028e-04, train_time=3.144 +[gpub015:0/64] 2023-07-05 04:12:00,871 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub015:0/64] 2023-07-05 04:12:18,984 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 04:12:22,440 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 04:12:22,440 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub015:0/64] 2023-07-05 04:12:22,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 04:18:32,238 (trainer:732) INFO: 13epoch:train:8301-8400batch: iter_time=1.223, forward_time=0.146, loss_ctc=72.469, loss_att=59.337, acc=0.686, loss=63.277, backward_time=1.252, grad_norm=96.645, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.028e-04, train_time=8.891 +[gpub015:0/64] 2023-07-05 04:21:10,438 (trainer:732) INFO: 13epoch:train:8401-8500batch: iter_time=1.150e-04, forward_time=0.145, loss_ctc=73.753, loss_att=57.673, acc=0.674, loss=62.497, backward_time=1.243, grad_norm=122.511, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.027e-04, train_time=3.164 +[gpub015:0/64] 2023-07-05 04:23:48,063 (trainer:732) INFO: 13epoch:train:8501-8600batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=73.060, loss_att=55.876, acc=0.686, loss=61.032, backward_time=1.241, grad_norm=95.020, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.027e-04, train_time=3.152 +[gpub015:0/64] 2023-07-05 04:26:25,397 (trainer:732) INFO: 13epoch:train:8601-8700batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=82.342, loss_att=65.676, acc=0.670, loss=70.675, backward_time=1.242, grad_norm=98.947, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.026e-04, train_time=3.146 +[gpub015:0/64] 2023-07-05 04:29:04,670 (trainer:732) INFO: 13epoch:train:8701-8800batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=82.940, loss_att=68.734, acc=0.670, loss=72.996, backward_time=1.243, grad_norm=94.812, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.026e-04, train_time=3.185 +[gpub015:0/64] 2023-07-05 04:31:43,040 (trainer:732) INFO: 13epoch:train:8801-8900batch: iter_time=1.124e-04, forward_time=0.147, loss_ctc=80.299, loss_att=65.836, acc=0.675, loss=70.175, backward_time=1.243, grad_norm=110.899, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.026e-04, train_time=3.167 +[gpub015:0/64] 2023-07-05 04:34:19,985 (trainer:732) INFO: 13epoch:train:8901-9000batch: iter_time=1.233e-04, forward_time=0.145, loss_ctc=74.553, loss_att=55.213, acc=0.672, loss=61.015, backward_time=1.240, grad_norm=114.627, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.025e-04, train_time=3.139 +[gpub015:0/64] 2023-07-05 04:36:57,034 (trainer:732) INFO: 13epoch:train:9001-9100batch: iter_time=1.199e-04, forward_time=0.145, loss_ctc=72.019, loss_att=59.355, acc=0.687, loss=63.154, backward_time=1.241, grad_norm=90.972, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.025e-04, train_time=3.141 +[gpub015:0/64] 2023-07-05 04:38:44,132 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub015:0/64] 2023-07-05 04:39:02,444 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 04:39:06,144 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 04:39:06,144 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub015:0/64] 2023-07-05 04:39:06,150 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 04:43:21,746 (trainer:732) INFO: 13epoch:train:9101-9200batch: iter_time=1.211, forward_time=0.146, loss_ctc=72.956, loss_att=53.606, acc=0.696, loss=59.411, backward_time=1.254, grad_norm=87.995, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.024e-04, train_time=7.694 +[gpub015:0/64] 2023-07-05 04:46:02,195 (trainer:732) INFO: 13epoch:train:9201-9300batch: iter_time=1.155e-04, forward_time=0.144, loss_ctc=71.310, loss_att=57.178, acc=0.677, loss=61.418, backward_time=1.247, grad_norm=95.519, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.024e-04, train_time=3.209 +[gpub015:0/64] 2023-07-05 04:48:40,368 (trainer:732) INFO: 13epoch:train:9301-9400batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=73.757, loss_att=55.866, acc=0.683, loss=61.234, backward_time=1.243, grad_norm=93.753, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.023e-04, train_time=3.163 +[gpub015:0/64] 2023-07-05 04:51:18,276 (trainer:732) INFO: 13epoch:train:9401-9500batch: iter_time=1.172e-04, forward_time=0.144, loss_ctc=76.011, loss_att=59.441, acc=0.685, loss=64.412, backward_time=1.242, grad_norm=89.515, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.023e-04, train_time=3.158 +[gpub015:0/64] 2023-07-05 04:53:55,349 (trainer:732) INFO: 13epoch:train:9501-9600batch: iter_time=1.167e-04, forward_time=0.144, loss_ctc=79.471, loss_att=63.716, acc=0.670, loss=68.443, backward_time=1.241, grad_norm=101.257, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.023e-04, train_time=3.141 +[gpub015:0/64] 2023-07-05 04:56:33,102 (trainer:732) INFO: 13epoch:train:9601-9700batch: iter_time=9.937e-05, forward_time=0.145, loss_ctc=89.171, loss_att=76.309, acc=0.664, loss=80.167, backward_time=1.245, grad_norm=108.677, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.022e-04, train_time=3.155 +[gpub015:0/64] 2023-07-05 04:59:10,081 (trainer:732) INFO: 13epoch:train:9701-9800batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=68.036, loss_att=50.680, acc=0.681, loss=55.886, backward_time=1.240, grad_norm=79.168, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.022e-04, train_time=3.139 +[gpub015:0/64] 2023-07-05 05:01:47,472 (trainer:732) INFO: 13epoch:train:9801-9900batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=75.666, loss_att=61.758, acc=0.685, loss=65.930, backward_time=1.245, grad_norm=98.942, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.021e-04, train_time=3.148 +[gpub015:0/64] 2023-07-05 05:04:24,479 (trainer:732) INFO: 13epoch:train:9901-10000batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=64.966, loss_att=46.260, acc=0.711, loss=51.872, backward_time=1.241, grad_norm=91.008, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.021e-04, train_time=3.140 +[gpub015:0/64] 2023-07-05 05:17:35,091 (trainer:338) INFO: 13epoch results: [train] iter_time=0.157, forward_time=0.145, loss_ctc=77.137, loss_att=60.711, acc=0.679, loss=65.639, backward_time=1.245, grad_norm=100.824, clip=100.000, loss_scale=2.287e+13, optim_step_time=0.181, optim0_lr0=1.043e-04, train_time=3.766, time=5 hours, 14 minutes and 12.9 seconds, total_count=100000, gpu_max_cached_mem_GB=37.139, [valid] loss_ctc=55.311, cer_ctc=0.308, loss_att=44.884, acc=0.629, cer=0.442, wer=0.994, loss=48.012, time=7 minutes and 2.64 seconds, total_count=10626, gpu_max_cached_mem_GB=37.139, [att_plot] time=5 minutes and 45.23 seconds, total_count=0, gpu_max_cached_mem_GB=37.139 +[gpub015:0/64] 2023-07-05 05:17:50,410 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub015:0/64] 2023-07-05 05:17:50,415 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/8epoch.pth +[gpub015:0/64] 2023-07-05 05:17:50,415 (trainer:272) INFO: 14/100epoch started. Estimated time to finish: 2 weeks, 5 days and 11 hours +[gpub015:0/64] 2023-07-05 05:17:50,419 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub015:0/64] 2023-07-05 05:18:08,009 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 05:18:11,352 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 05:18:11,353 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub015:0/64] 2023-07-05 05:18:11,359 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 05:22:38,320 (trainer:732) INFO: 14epoch:train:1-100batch: iter_time=1.236, forward_time=0.146, loss_ctc=67.492, loss_att=49.941, acc=0.685, loss=55.206, backward_time=1.261, grad_norm=90.648, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.020e-04, train_time=5.758 +[gpub015:0/64] 2023-07-05 05:25:16,237 (trainer:732) INFO: 14epoch:train:101-200batch: iter_time=1.042e-04, forward_time=0.145, loss_ctc=76.460, loss_att=60.179, acc=0.660, loss=65.063, backward_time=1.240, grad_norm=93.506, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.020e-04, train_time=3.158 +[gpub015:0/64] 2023-07-05 05:27:54,151 (trainer:732) INFO: 14epoch:train:201-300batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=71.779, loss_att=54.026, acc=0.681, loss=59.352, backward_time=1.241, grad_norm=86.178, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.020e-04, train_time=3.158 +[gpub015:0/64] 2023-07-05 05:30:31,984 (trainer:732) INFO: 14epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=74.923, loss_att=54.424, acc=0.671, loss=60.574, backward_time=1.241, grad_norm=97.880, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.019e-04, train_time=3.156 +[gpub015:0/64] 2023-07-05 05:33:17,013 (trainer:732) INFO: 14epoch:train:401-500batch: iter_time=5.366e-04, forward_time=0.209, loss_ctc=74.569, loss_att=59.503, acc=0.676, loss=64.023, backward_time=1.247, grad_norm=94.127, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.184, optim0_lr0=1.019e-04, train_time=3.300 +[gpub015:0/64] 2023-07-05 05:36:02,320 (trainer:732) INFO: 14epoch:train:501-600batch: iter_time=1.064e-04, forward_time=0.200, loss_ctc=68.014, loss_att=54.675, acc=0.665, loss=58.677, backward_time=1.252, grad_norm=82.294, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.184, optim0_lr0=1.018e-04, train_time=3.306 +[gpub015:0/64] 2023-07-05 05:38:53,673 (trainer:732) INFO: 14epoch:train:601-700batch: iter_time=1.074e-04, forward_time=0.162, loss_ctc=79.869, loss_att=65.427, acc=0.663, loss=69.759, backward_time=1.260, grad_norm=100.551, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.183, optim0_lr0=1.018e-04, train_time=3.427 +[gpub015:0/64] 2023-07-05 05:41:42,236 (trainer:732) INFO: 14epoch:train:701-800batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=87.368, loss_att=57.776, acc=0.685, loss=66.653, backward_time=1.254, grad_norm=128.453, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.017e-04, train_time=3.371 +[gpub015:0/64] 2023-07-05 05:42:44,919 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub015:0/64] 2023-07-05 05:43:02,321 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 05:43:05,674 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 05:43:05,674 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub015:0/64] 2023-07-05 05:43:05,680 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 05:47:29,182 (trainer:732) INFO: 14epoch:train:801-900batch: iter_time=1.271, forward_time=0.146, loss_ctc=80.548, loss_att=57.074, acc=0.681, loss=64.116, backward_time=1.258, grad_norm=96.622, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.017e-04, train_time=6.939 +[gpub015:0/64] 2023-07-05 05:50:06,931 (trainer:732) INFO: 14epoch:train:901-1000batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=78.396, loss_att=65.274, acc=0.669, loss=69.210, backward_time=1.243, grad_norm=98.587, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.017e-04, train_time=3.155 +[gpub015:0/64] 2023-07-05 05:52:44,281 (trainer:732) INFO: 14epoch:train:1001-1100batch: iter_time=1.191e-04, forward_time=0.146, loss_ctc=70.269, loss_att=52.890, acc=0.696, loss=58.104, backward_time=1.243, grad_norm=92.744, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.016e-04, train_time=3.147 +[gpub015:0/64] 2023-07-05 05:55:21,561 (trainer:732) INFO: 14epoch:train:1101-1200batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=71.987, loss_att=52.009, acc=0.680, loss=58.002, backward_time=1.242, grad_norm=90.603, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.016e-04, train_time=3.145 +[gpub015:0/64] 2023-07-05 05:57:58,721 (trainer:732) INFO: 14epoch:train:1201-1300batch: iter_time=1.267e-04, forward_time=0.145, loss_ctc=73.245, loss_att=58.533, acc=0.680, loss=62.946, backward_time=1.242, grad_norm=93.277, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.015e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 06:00:35,732 (trainer:732) INFO: 14epoch:train:1301-1400batch: iter_time=1.379e-04, forward_time=0.144, loss_ctc=69.153, loss_att=56.204, acc=0.675, loss=60.089, backward_time=1.242, grad_norm=87.236, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.015e-04, train_time=3.140 +[gpub015:0/64] 2023-07-05 06:03:13,091 (trainer:732) INFO: 14epoch:train:1401-1500batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=75.620, loss_att=62.912, acc=0.676, loss=66.725, backward_time=1.242, grad_norm=122.064, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.014e-04, train_time=3.147 +[gpub015:0/64] 2023-07-05 06:05:50,085 (trainer:732) INFO: 14epoch:train:1501-1600batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=86.422, loss_att=59.210, acc=0.686, loss=67.374, backward_time=1.240, grad_norm=121.982, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.014e-04, train_time=3.140 +[gpub015:0/64] 2023-07-05 06:07:45,814 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub015:0/64] 2023-07-05 06:08:04,021 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 06:08:07,429 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 06:08:07,429 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub015:0/64] 2023-07-05 06:08:07,435 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 06:11:42,622 (trainer:732) INFO: 14epoch:train:1601-1700batch: iter_time=1.516, forward_time=0.145, loss_ctc=91.881, loss_att=63.528, acc=0.682, loss=72.034, backward_time=1.250, grad_norm=122.952, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.014e-04, train_time=7.051 +[gpub015:0/64] 2023-07-05 06:14:20,460 (trainer:732) INFO: 14epoch:train:1701-1800batch: iter_time=1.177e-04, forward_time=0.145, loss_ctc=65.864, loss_att=52.353, acc=0.669, loss=56.407, backward_time=1.243, grad_norm=85.260, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.013e-04, train_time=3.157 +[gpub015:0/64] 2023-07-05 06:16:57,381 (trainer:732) INFO: 14epoch:train:1801-1900batch: iter_time=1.220e-04, forward_time=0.143, loss_ctc=80.381, loss_att=62.098, acc=0.676, loss=67.583, backward_time=1.240, grad_norm=109.302, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.013e-04, train_time=3.138 +[gpub015:0/64] 2023-07-05 06:19:34,703 (trainer:732) INFO: 14epoch:train:1901-2000batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=65.835, loss_att=46.913, acc=0.695, loss=52.589, backward_time=1.240, grad_norm=77.097, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.012e-04, train_time=3.146 +[gpub015:0/64] 2023-07-05 06:22:11,686 (trainer:732) INFO: 14epoch:train:2001-2100batch: iter_time=1.216e-04, forward_time=0.144, loss_ctc=74.781, loss_att=55.605, acc=0.680, loss=61.358, backward_time=1.240, grad_norm=98.143, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.012e-04, train_time=3.139 +[gpub015:0/64] 2023-07-05 06:24:48,909 (trainer:732) INFO: 14epoch:train:2101-2200batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=70.364, loss_att=58.732, acc=0.665, loss=62.221, backward_time=1.243, grad_norm=96.995, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.012e-04, train_time=3.144 +[gpub015:0/64] 2023-07-05 06:27:26,091 (trainer:732) INFO: 14epoch:train:2201-2300batch: iter_time=1.191e-04, forward_time=0.145, loss_ctc=71.398, loss_att=61.066, acc=0.665, loss=64.165, backward_time=1.242, grad_norm=86.951, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.011e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 06:30:03,134 (trainer:732) INFO: 14epoch:train:2301-2400batch: iter_time=1.059e-04, forward_time=0.145, loss_ctc=79.594, loss_att=55.367, acc=0.691, loss=62.635, backward_time=1.241, grad_norm=111.641, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.011e-04, train_time=3.141 +[gpub015:0/64] 2023-07-05 06:32:40,642 (trainer:732) INFO: 14epoch:train:2401-2500batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=91.170, loss_att=66.737, acc=0.666, loss=74.067, backward_time=1.244, grad_norm=122.083, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.010e-04, train_time=3.150 +[gpub015:0/64] 2023-07-05 06:32:43,499 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub015:0/64] 2023-07-05 06:33:01,325 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 06:33:04,751 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 06:33:04,751 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub015:0/64] 2023-07-05 06:33:04,758 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 06:38:39,312 (trainer:732) INFO: 14epoch:train:2501-2600batch: iter_time=1.217, forward_time=0.147, loss_ctc=66.054, loss_att=47.825, acc=0.704, loss=53.294, backward_time=1.250, grad_norm=90.048, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.010e-04, train_time=7.173 +[gpub015:0/64] 2023-07-05 06:41:16,644 (trainer:732) INFO: 14epoch:train:2601-2700batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=74.725, loss_att=59.961, acc=0.674, loss=64.390, backward_time=1.242, grad_norm=98.791, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.010e-04, train_time=3.146 +[gpub015:0/64] 2023-07-05 06:43:53,864 (trainer:732) INFO: 14epoch:train:2701-2800batch: iter_time=1.041e-04, forward_time=0.145, loss_ctc=71.636, loss_att=53.864, acc=0.695, loss=59.196, backward_time=1.242, grad_norm=88.306, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.009e-04, train_time=3.144 +[gpub015:0/64] 2023-07-05 06:46:30,941 (trainer:732) INFO: 14epoch:train:2801-2900batch: iter_time=1.197e-04, forward_time=0.146, loss_ctc=72.656, loss_att=52.692, acc=0.678, loss=58.681, backward_time=1.241, grad_norm=93.367, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.009e-04, train_time=3.141 +[gpub015:0/64] 2023-07-05 06:49:08,213 (trainer:732) INFO: 14epoch:train:2901-3000batch: iter_time=1.044e-04, forward_time=0.144, loss_ctc=72.828, loss_att=58.523, acc=0.687, loss=62.815, backward_time=1.242, grad_norm=92.957, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.008e-04, train_time=3.145 +[gpub015:0/64] 2023-07-05 06:51:45,294 (trainer:732) INFO: 14epoch:train:3001-3100batch: iter_time=1.069e-04, forward_time=0.143, loss_ctc=68.485, loss_att=54.235, acc=0.679, loss=58.510, backward_time=1.241, grad_norm=90.151, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.008e-04, train_time=3.141 +[gpub015:0/64] 2023-07-05 06:54:28,653 (trainer:732) INFO: 14epoch:train:3101-3200batch: iter_time=9.495e-05, forward_time=0.144, loss_ctc=79.365, loss_att=65.692, acc=0.672, loss=69.794, backward_time=1.247, grad_norm=93.123, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.007e-04, train_time=3.267 +[gpub015:0/64] 2023-07-05 06:57:07,175 (trainer:732) INFO: 14epoch:train:3201-3300batch: iter_time=9.992e-05, forward_time=0.144, loss_ctc=85.733, loss_att=57.493, acc=0.694, loss=65.965, backward_time=1.243, grad_norm=134.602, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.007e-04, train_time=3.170 +[gpub015:0/64] 2023-07-05 06:58:02,021 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub015:0/64] 2023-07-05 06:58:20,249 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 06:58:23,672 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 06:58:23,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub015:0/64] 2023-07-05 06:58:23,679 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 07:02:56,111 (trainer:732) INFO: 14epoch:train:3301-3400batch: iter_time=1.206, forward_time=0.206, loss_ctc=75.066, loss_att=55.125, acc=0.684, loss=61.108, backward_time=1.254, grad_norm=97.874, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.187, optim0_lr0=1.007e-04, train_time=6.978 +[gpub015:0/64] 2023-07-05 07:05:33,511 (trainer:732) INFO: 14epoch:train:3401-3500batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=80.268, loss_att=64.486, acc=0.664, loss=69.220, backward_time=1.243, grad_norm=102.801, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.006e-04, train_time=3.148 +[gpub015:0/64] 2023-07-05 07:08:10,474 (trainer:732) INFO: 14epoch:train:3501-3600batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=70.086, loss_att=52.785, acc=0.689, loss=57.975, backward_time=1.240, grad_norm=91.850, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.006e-04, train_time=3.139 +[gpub015:0/64] 2023-07-05 07:10:47,426 (trainer:732) INFO: 14epoch:train:3601-3700batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=68.709, loss_att=48.855, acc=0.690, loss=54.811, backward_time=1.241, grad_norm=84.286, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.005e-04, train_time=3.139 +[gpub015:0/64] 2023-07-05 07:13:25,490 (trainer:732) INFO: 14epoch:train:3701-3800batch: iter_time=1.245e-04, forward_time=0.146, loss_ctc=71.880, loss_att=56.870, acc=0.684, loss=61.373, backward_time=1.243, grad_norm=101.212, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.005e-04, train_time=3.161 +[gpub015:0/64] 2023-07-05 07:16:02,505 (trainer:732) INFO: 14epoch:train:3801-3900batch: iter_time=1.332e-04, forward_time=0.146, loss_ctc=65.690, loss_att=53.660, acc=0.671, loss=57.269, backward_time=1.240, grad_norm=103.338, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.005e-04, train_time=3.140 +[gpub015:0/64] 2023-07-05 07:18:39,723 (trainer:732) INFO: 14epoch:train:3901-4000batch: iter_time=1.226e-04, forward_time=0.147, loss_ctc=75.033, loss_att=62.084, acc=0.672, loss=65.969, backward_time=1.243, grad_norm=101.234, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.004e-04, train_time=3.144 +[gpub015:0/64] 2023-07-05 07:21:16,597 (trainer:732) INFO: 14epoch:train:4001-4100batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=83.330, loss_att=57.692, acc=0.689, loss=65.383, backward_time=1.241, grad_norm=106.096, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.182, optim0_lr0=1.004e-04, train_time=3.137 +[gpub015:0/64] 2023-07-05 07:23:05,096 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub015:0/64] 2023-07-05 07:23:23,163 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 07:23:26,571 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 07:23:26,571 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub015:0/64] 2023-07-05 07:23:26,577 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 07:27:52,257 (trainer:732) INFO: 14epoch:train:4101-4200batch: iter_time=1.222, forward_time=0.146, loss_ctc=87.297, loss_att=61.477, acc=0.683, loss=69.223, backward_time=1.252, grad_norm=113.635, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.182, optim0_lr0=1.003e-04, train_time=7.913 +[gpub015:0/64] 2023-07-05 07:30:30,045 (trainer:732) INFO: 14epoch:train:4201-4300batch: iter_time=9.853e-05, forward_time=0.145, loss_ctc=67.259, loss_att=52.733, acc=0.675, loss=57.091, backward_time=1.241, grad_norm=99.259, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.003e-04, train_time=3.156 +[gpub015:0/64] 2023-07-05 07:33:07,210 (trainer:732) INFO: 14epoch:train:4301-4400batch: iter_time=1.197e-04, forward_time=0.144, loss_ctc=75.113, loss_att=56.926, acc=0.690, loss=62.382, backward_time=1.242, grad_norm=106.822, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.003e-04, train_time=3.143 +[gpub015:0/64] 2023-07-05 07:35:43,948 (trainer:732) INFO: 14epoch:train:4401-4500batch: iter_time=1.184e-04, forward_time=0.143, loss_ctc=68.567, loss_att=50.621, acc=0.682, loss=56.005, backward_time=1.240, grad_norm=83.577, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.002e-04, train_time=3.135 +[gpub015:0/64] 2023-07-05 07:38:30,505 (trainer:732) INFO: 14epoch:train:4501-4600batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=71.674, loss_att=50.888, acc=0.691, loss=57.124, backward_time=1.253, grad_norm=85.775, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.002e-04, train_time=3.331 +[gpub015:0/64] 2023-07-05 07:41:07,397 (trainer:732) INFO: 14epoch:train:4601-4700batch: iter_time=1.265e-04, forward_time=0.143, loss_ctc=73.227, loss_att=60.909, acc=0.677, loss=64.604, backward_time=1.241, grad_norm=95.135, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.001e-04, train_time=3.138 +[gpub015:0/64] 2023-07-05 07:43:46,708 (trainer:732) INFO: 14epoch:train:4701-4800batch: iter_time=1.181e-04, forward_time=0.145, loss_ctc=68.192, loss_att=55.093, acc=0.669, loss=59.023, backward_time=1.241, grad_norm=88.359, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.001e-04, train_time=3.186 +[gpub015:0/64] 2023-07-05 07:46:23,653 (trainer:732) INFO: 14epoch:train:4801-4900batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=79.220, loss_att=59.755, acc=0.682, loss=65.594, backward_time=1.241, grad_norm=120.294, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.001e-04, train_time=3.139 +[gpub015:0/64] 2023-07-05 07:49:07,600 (trainer:732) INFO: 14epoch:train:4901-5000batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=86.729, loss_att=63.418, acc=0.679, loss=70.412, backward_time=1.251, grad_norm=111.675, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.000e-04, train_time=3.279 +[gpub015:0/64] 2023-07-05 07:49:10,466 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub015:0/64] 2023-07-05 07:49:28,643 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 07:49:32,109 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 07:49:32,109 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub015:0/64] 2023-07-05 07:49:32,115 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 07:54:27,305 (trainer:732) INFO: 14epoch:train:5001-5100batch: iter_time=1.182, forward_time=0.145, loss_ctc=65.866, loss_att=47.473, acc=0.709, loss=52.991, backward_time=1.255, grad_norm=80.499, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.998e-05, train_time=6.394 +[gpub015:0/64] 2023-07-05 07:57:05,479 (trainer:732) INFO: 14epoch:train:5101-5200batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=72.265, loss_att=58.470, acc=0.677, loss=62.608, backward_time=1.242, grad_norm=95.899, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.994e-05, train_time=3.163 +[gpub015:0/64] 2023-07-05 07:59:42,843 (trainer:732) INFO: 14epoch:train:5201-5300batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=68.799, loss_att=51.505, acc=0.703, loss=56.693, backward_time=1.242, grad_norm=91.972, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.990e-05, train_time=3.147 +[gpub015:0/64] 2023-07-05 08:02:19,845 (trainer:732) INFO: 14epoch:train:5301-5400batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=71.880, loss_att=51.948, acc=0.684, loss=57.928, backward_time=1.240, grad_norm=84.364, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.986e-05, train_time=3.140 +[gpub015:0/64] 2023-07-05 08:04:57,349 (trainer:732) INFO: 14epoch:train:5401-5500batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=71.747, loss_att=58.949, acc=0.688, loss=62.788, backward_time=1.241, grad_norm=95.166, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.982e-05, train_time=3.150 +[gpub015:0/64] 2023-07-05 08:07:36,385 (trainer:732) INFO: 14epoch:train:5501-5600batch: iter_time=1.111e-04, forward_time=0.146, loss_ctc=66.692, loss_att=52.899, acc=0.683, loss=57.037, backward_time=1.241, grad_norm=86.102, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.978e-05, train_time=3.181 +[gpub015:0/64] 2023-07-05 08:10:15,268 (trainer:732) INFO: 14epoch:train:5601-5700batch: iter_time=1.079e-04, forward_time=0.146, loss_ctc=76.339, loss_att=65.761, acc=0.678, loss=68.934, backward_time=1.244, grad_norm=93.247, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.974e-05, train_time=3.177 +[gpub015:0/64] 2023-07-05 08:12:52,081 (trainer:732) INFO: 14epoch:train:5701-5800batch: iter_time=1.003e-04, forward_time=0.144, loss_ctc=82.139, loss_att=57.379, acc=0.692, loss=64.807, backward_time=1.240, grad_norm=128.174, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.970e-05, train_time=3.136 +[gpub015:0/64] 2023-07-05 08:13:46,941 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub015:0/64] 2023-07-05 08:14:04,844 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 08:14:08,310 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 08:14:08,310 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub015:0/64] 2023-07-05 08:14:08,316 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 08:19:19,699 (trainer:732) INFO: 14epoch:train:5801-5900batch: iter_time=1.217, forward_time=0.145, loss_ctc=71.224, loss_att=52.241, acc=0.699, loss=57.936, backward_time=1.247, grad_norm=98.314, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.966e-05, train_time=7.752 +[gpub015:0/64] 2023-07-05 08:21:59,636 (trainer:732) INFO: 14epoch:train:5901-6000batch: iter_time=1.174e-04, forward_time=0.145, loss_ctc=74.291, loss_att=61.484, acc=0.686, loss=65.326, backward_time=1.251, grad_norm=92.289, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.962e-05, train_time=3.199 +[gpub015:0/64] 2023-07-05 08:24:37,095 (trainer:732) INFO: 14epoch:train:6001-6100batch: iter_time=1.225e-04, forward_time=0.145, loss_ctc=73.843, loss_att=54.203, acc=0.692, loss=60.095, backward_time=1.241, grad_norm=98.891, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.958e-05, train_time=3.149 +[gpub015:0/64] 2023-07-05 08:27:16,819 (trainer:732) INFO: 14epoch:train:6101-6200batch: iter_time=1.314e-04, forward_time=0.145, loss_ctc=63.824, loss_att=46.318, acc=0.701, loss=51.570, backward_time=1.242, grad_norm=82.169, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.954e-05, train_time=3.194 +[gpub015:0/64] 2023-07-05 08:29:58,576 (trainer:732) INFO: 14epoch:train:6201-6300batch: iter_time=1.331e-04, forward_time=0.146, loss_ctc=76.866, loss_att=57.439, acc=0.689, loss=63.267, backward_time=1.252, grad_norm=97.819, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.950e-05, train_time=3.235 +[gpub015:0/64] 2023-07-05 08:32:35,761 (trainer:732) INFO: 14epoch:train:6301-6400batch: iter_time=1.293e-04, forward_time=0.144, loss_ctc=66.634, loss_att=56.034, acc=0.687, loss=59.214, backward_time=1.241, grad_norm=102.060, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.946e-05, train_time=3.143 +[gpub015:0/64] 2023-07-05 08:35:12,784 (trainer:732) INFO: 14epoch:train:6401-6500batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=72.410, loss_att=59.662, acc=0.683, loss=63.486, backward_time=1.242, grad_norm=99.566, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.942e-05, train_time=3.140 +[gpub015:0/64] 2023-07-05 08:37:49,786 (trainer:732) INFO: 14epoch:train:6501-6600batch: iter_time=1.133e-04, forward_time=0.144, loss_ctc=83.069, loss_att=58.236, acc=0.699, loss=65.686, backward_time=1.242, grad_norm=114.626, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.938e-05, train_time=3.140 +[gpub015:0/64] 2023-07-05 08:39:39,785 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub015:0/64] 2023-07-05 08:39:57,765 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 08:40:01,165 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 08:40:01,165 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub015:0/64] 2023-07-05 08:40:01,171 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 08:45:39,161 (trainer:732) INFO: 14epoch:train:6601-6700batch: iter_time=1.302, forward_time=0.147, loss_ctc=81.601, loss_att=58.591, acc=0.689, loss=65.494, backward_time=1.254, grad_norm=111.016, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.182, optim0_lr0=9.935e-05, train_time=9.386 +[gpub015:0/64] 2023-07-05 08:48:20,753 (trainer:732) INFO: 14epoch:train:6701-6800batch: iter_time=1.204e-04, forward_time=0.152, loss_ctc=64.824, loss_att=53.322, acc=0.679, loss=56.773, backward_time=1.251, grad_norm=91.480, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.931e-05, train_time=3.233 +[gpub015:0/64] 2023-07-05 08:50:57,965 (trainer:732) INFO: 14epoch:train:6801-6900batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=74.189, loss_att=56.752, acc=0.696, loss=61.983, backward_time=1.242, grad_norm=88.689, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.927e-05, train_time=3.144 +[gpub015:0/64] 2023-07-05 08:53:34,859 (trainer:732) INFO: 14epoch:train:6901-7000batch: iter_time=1.250e-04, forward_time=0.144, loss_ctc=67.349, loss_att=48.904, acc=0.691, loss=54.437, backward_time=1.240, grad_norm=96.258, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.923e-05, train_time=3.138 +[gpub015:0/64] 2023-07-05 08:56:11,811 (trainer:732) INFO: 14epoch:train:7001-7100batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=70.955, loss_att=50.384, acc=0.695, loss=56.555, backward_time=1.239, grad_norm=99.455, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.919e-05, train_time=3.139 +[gpub015:0/64] 2023-07-05 08:58:48,943 (trainer:732) INFO: 14epoch:train:7101-7200batch: iter_time=1.220e-04, forward_time=0.143, loss_ctc=73.206, loss_att=61.616, acc=0.676, loss=65.093, backward_time=1.241, grad_norm=86.657, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.915e-05, train_time=3.142 +[gpub015:0/64] 2023-07-05 09:01:26,106 (trainer:732) INFO: 14epoch:train:7201-7300batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=66.192, loss_att=55.575, acc=0.671, loss=58.760, backward_time=1.241, grad_norm=92.181, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.911e-05, train_time=3.143 +[gpub015:0/64] 2023-07-05 09:04:03,534 (trainer:732) INFO: 14epoch:train:7301-7400batch: iter_time=1.310e-04, forward_time=0.145, loss_ctc=79.538, loss_att=59.235, acc=0.689, loss=65.326, backward_time=1.242, grad_norm=100.524, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.907e-05, train_time=3.148 +[gpub015:0/64] 2023-07-05 09:06:42,404 (trainer:732) INFO: 14epoch:train:7401-7500batch: iter_time=1.288e-04, forward_time=0.144, loss_ctc=86.486, loss_att=62.275, acc=0.679, loss=69.538, backward_time=1.242, grad_norm=101.606, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.903e-05, train_time=3.177 +[gpub015:0/64] 2023-07-05 09:06:48,753 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub015:0/64] 2023-07-05 09:07:06,613 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 09:07:10,023 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 09:07:10,023 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub015:0/64] 2023-07-05 09:07:10,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 09:14:08,695 (trainer:732) INFO: 14epoch:train:7501-7600batch: iter_time=1.646, forward_time=0.177, loss_ctc=69.696, loss_att=50.684, acc=0.697, loss=56.387, backward_time=1.258, grad_norm=83.667, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.183, optim0_lr0=9.899e-05, train_time=8.925 +[gpub015:0/64] 2023-07-05 09:16:46,222 (trainer:732) INFO: 14epoch:train:7601-7700batch: iter_time=9.515e-05, forward_time=0.144, loss_ctc=69.322, loss_att=56.992, acc=0.685, loss=60.691, backward_time=1.242, grad_norm=99.714, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.896e-05, train_time=3.151 +[gpub015:0/64] 2023-07-05 09:19:23,334 (trainer:732) INFO: 14epoch:train:7701-7800batch: iter_time=1.117e-04, forward_time=0.144, loss_ctc=71.131, loss_att=52.075, acc=0.696, loss=57.792, backward_time=1.242, grad_norm=86.667, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.892e-05, train_time=3.142 +[gpub015:0/64] 2023-07-05 09:22:00,431 (trainer:732) INFO: 14epoch:train:7801-7900batch: iter_time=1.339e-04, forward_time=0.144, loss_ctc=73.686, loss_att=51.618, acc=0.692, loss=58.238, backward_time=1.241, grad_norm=96.390, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.888e-05, train_time=3.142 +[gpub015:0/64] 2023-07-05 09:24:37,992 (trainer:732) INFO: 14epoch:train:7901-8000batch: iter_time=1.229e-04, forward_time=0.145, loss_ctc=70.794, loss_att=61.014, acc=0.684, loss=63.948, backward_time=1.244, grad_norm=96.805, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.884e-05, train_time=3.151 +[gpub015:0/64] 2023-07-05 09:27:15,174 (trainer:732) INFO: 14epoch:train:8001-8100batch: iter_time=1.156e-04, forward_time=0.144, loss_ctc=67.169, loss_att=55.923, acc=0.676, loss=59.297, backward_time=1.241, grad_norm=91.698, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.880e-05, train_time=3.143 +[gpub015:0/64] 2023-07-05 09:29:52,590 (trainer:732) INFO: 14epoch:train:8101-8200batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=78.525, loss_att=62.256, acc=0.685, loss=67.136, backward_time=1.243, grad_norm=99.114, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.876e-05, train_time=3.148 +[gpub015:0/64] 2023-07-05 09:32:30,203 (trainer:732) INFO: 14epoch:train:8201-8300batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=81.362, loss_att=55.922, acc=0.694, loss=63.554, backward_time=1.242, grad_norm=95.643, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.872e-05, train_time=3.152 +[gpub015:0/64] 2023-07-05 09:33:34,938 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub015:0/64] 2023-07-05 09:33:53,041 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 09:33:56,452 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 09:33:56,452 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub015:0/64] 2023-07-05 09:33:56,459 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +[gpub015:0/64] 2023-07-05 09:39:03,625 (trainer:732) INFO: 14epoch:train:8301-8400batch: iter_time=1.854, forward_time=0.165, loss_ctc=73.073, loss_att=53.289, acc=0.697, loss=59.224, backward_time=1.253, grad_norm=90.126, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.182, optim0_lr0=9.869e-05, train_time=7.868 +[gpub015:0/64] 2023-07-05 09:41:49,065 (trainer:732) INFO: 14epoch:train:8401-8500batch: iter_time=1.238e-04, forward_time=0.143, loss_ctc=74.007, loss_att=61.877, acc=0.673, loss=65.516, backward_time=1.249, grad_norm=104.208, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.865e-05, train_time=3.309 +[gpub015:0/64] 2023-07-05 09:44:26,311 (trainer:732) INFO: 14epoch:train:8501-8600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=72.420, loss_att=54.139, acc=0.689, loss=59.623, backward_time=1.241, grad_norm=86.207, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.861e-05, train_time=3.145 +[gpub015:0/64] 2023-07-05 09:47:03,231 (trainer:732) INFO: 14epoch:train:8601-8700batch: iter_time=1.255e-04, forward_time=0.144, loss_ctc=63.298, loss_att=45.849, acc=0.702, loss=51.084, backward_time=1.241, grad_norm=78.605, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.857e-05, train_time=3.138 +[gpub015:0/64] 2023-07-05 09:49:40,176 (trainer:732) INFO: 14epoch:train:8701-8800batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=75.575, loss_att=55.981, acc=0.687, loss=61.859, backward_time=1.241, grad_norm=95.599, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.853e-05, train_time=3.139 +[gpub015:0/64] 2023-07-05 09:52:37,889 (trainer:732) INFO: 14epoch:train:8801-8900batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=65.438, loss_att=56.468, acc=0.674, loss=59.159, backward_time=1.391, grad_norm=78.677, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.849e-05, train_time=3.554 +[gpub015:0/64] 2023-07-05 09:55:59,096 (trainer:732) INFO: 14epoch:train:8901-9000batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=70.899, loss_att=58.105, acc=0.677, loss=61.943, backward_time=1.561, grad_norm=86.385, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.846e-05, train_time=4.024 +[gpub015:0/64] 2023-07-05 09:59:18,624 (trainer:732) INFO: 14epoch:train:9001-9100batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=81.516, loss_att=57.092, acc=0.700, loss=64.419, backward_time=1.548, grad_norm=122.948, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.842e-05, train_time=3.990 +[gpub015:0/64] 2023-07-05 10:01:35,903 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub015:0/64] 2023-07-05 10:01:53,829 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub015:0/64] 2023-07-05 10:01:57,524 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub015:0/64] 2023-07-05 10:01:57,525 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub015:0/64] 2023-07-05 10:01:57,531 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML + warnings.warn("Can't initialize NVML") +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1534, in all_reduce + work = default_pg.allreduce([tensor], opts) +RuntimeError: CUDA error: unknown error +CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +gpub078:4170392:4170392 [3] NCCL INFO comm 0x4f67f390 rank 47 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 6] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 5] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 13] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 12] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 15] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 14] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 4] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 22] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 21] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 20] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 23] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub037:1522725:1522810 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub037:1522724:1522808 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub032:3289604:3289696 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub032:3289605:3289693 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub032:3289606:3289626 [0] NCCL INFO comm 0x501cec20 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub026:2433085:2433171 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub026:2433084:2433172 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub026:2433086:2433173 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub037:1522724:1522745 [0] NCCL INFO comm 0xab8ed350 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub032:3289604:3289627 [0] NCCL INFO comm 0x50c34690 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub032:3289605:3289624 [0] NCCL INFO comm 0xb6f8bc90 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub026:2433085:2433106 [0] NCCL INFO comm 0xb7dab990 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub037:1522725:1522747 [0] NCCL INFO comm 0x4f7df910 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub026:2433084:2433108 [0] NCCL INFO comm 0x4fe36690 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub026:2433086:2433109 [0] NCCL INFO comm 0xc27df910 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub037:1522723:1522746 [0] NCCL INFO comm 0xba5d23a0 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub037:1522722:1522811 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub037:1522722:1522744 [0] NCCL INFO comm 0x514cae40 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 39] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 38] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub052:1901670:1901757 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +[W ProcessGroupNCCL.cpp:948] [Rank 36] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 37] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub052:1901670:1901690 [0] NCCL INFO comm 0xb6ced700 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub052:1901669:1901759 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub052:1901668:1901758 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub052:1901668:1901689 [0] NCCL INFO comm 0x50134230 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub052:1901669:1901688 [0] NCCL INFO comm 0x50c05250 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub052:1901667:1901760 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub032:3289603:3289625 [0] NCCL INFO comm 0x9f95b40 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub052:1901667:1901691 [0] NCCL INFO comm 0xbc2124a0 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub080:4113203:4113295 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub080:4113203:4113203 [0] NCCL INFO comm 0xa21d7f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub015:879783:879861 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub015:879783:879783 [3] NCCL INFO comm 0x5071eb50 rank 3 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Process SpawnProcess-3: +gpub079:2657933:2658017 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 14] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 13] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub079:2657933:2657933 [1] NCCL INFO comm 0x8f776d0 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-1: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 36] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 6] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 15] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 23] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 12] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 5] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 51] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 50] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub079:2657934:2658014 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub079:2657935:2658016 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +[W ProcessGroupNCCL.cpp:948] [Rank 48] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 28] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 4] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 29] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +gpub079:2657935:2657956 [0] NCCL INFO comm 0x4edd83d0 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 21] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 31] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 20] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 22] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub050:1879228:1879313 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub050:1879226:1879312 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +[W ProcessGroupNCCL.cpp:948] [Rank 30] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub050:1879227:1879310 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 47] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800494 milliseconds before timing out. +gpub079:2657934:2657955 [0] NCCL INFO comm 0x505ec9b0 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub050:1879226:1879246 [0] NCCL INFO comm 0x50792660 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub050:1879228:1879247 [0] NCCL INFO comm 0x5177dae0 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub050:1879227:1879245 [0] NCCL INFO comm 0x50baa200 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 38] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 33] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 32] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 34] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 35] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 39] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 37] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 52] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800348 milliseconds before timing out. +gpub051:2913626:2913716 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub050:1879225:1879311 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub051:2913625:2913713 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub051:2913626:2913649 [0] NCCL INFO comm 0x9e42a10 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub050:1879225:1879248 [0] NCCL INFO comm 0xa81f9440 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub051:2913625:2913648 [0] NCCL INFO comm 0xb9b5ccd0 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub079:2657932:2657957 [0] NCCL INFO comm 0x8c890be0 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub015:879781:879860 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub015:879781:879781 [1] NCCL INFO comm 0x8d09c1b0 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 3] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800141 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 49] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800195 milliseconds before timing out. +gpub051:2913623:2913647 [0] NCCL INFO comm 0x8dc3e980 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Process SpawnProcess-4: +Traceback (most recent call last): +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 31] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 35] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 29] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub051:2913624:2913650 [0] NCCL INFO comm 0xbb329750 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 28] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) +gpub081:2742228:2742325 [1] NCCL INFO [Service thread] Connection closed by localRank 1 + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 34] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub081:2742228:2742228 [1] NCCL INFO comm 0xb78a1250 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 48] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 1] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800012 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator was aborted on rank 30. Original reason for failure was: [Rank 30] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator was aborted on rank 51. Original reason for failure was: [Rank 51] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 32] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator was aborted on rank 50. Original reason for failure was: [Rank 50] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub081:2742227:2742227 [0] NCCL INFO comm 0x518b4950 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 55] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 54] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 53] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 33] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub080:4113206:4113298 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub080:4113204:4113297 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub080:4113205:4113296 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub080:4113206:4113224 [0] NCCL INFO comm 0x8c72c2a0 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub080:4113205:4113226 [0] NCCL INFO comm 0x50af0e00 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub080:4113204:4113225 [0] NCCL INFO comm 0xb71b4bf0 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 57] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800134 milliseconds before timing out. +gpub078:4170389:4170389 [0] NCCL INFO comm 0x4f656710 rank 44 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 55] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 56] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800687 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 54] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 44] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800885 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 53] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 61] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 60] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 63] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 62] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub015:879780:879780 [0] NCCL INFO comm 0x51871d20 rank 0 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 24] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 27] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 26] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 25] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub082:1518446:1518535 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub082:1518446:1518467 [0] NCCL INFO comm 0xb6caaae0 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub082:1518447:1518469 [0] NCCL INFO comm 0xb6376a90 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub082:1518448:1518468 [0] NCCL INFO comm 0x8c5b6f90 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub049:4064877:4064950 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub049:4064875:4064949 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub049:4064877:4064897 [0] NCCL INFO comm 0x4f5c00a0 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub049:4064875:4064894 [0] NCCL INFO comm 0xa8769be0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub049:4064876:4064896 [0] NCCL INFO comm 0xb89777d0 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub049:4064874:4064895 [0] NCCL INFO comm 0x500f4c60 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 59] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 58] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub081:2742229:2742324 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub081:2742230:2742253 [0] NCCL INFO comm 0xba992be0 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub081:2742229:2742252 [0] NCCL INFO comm 0x50f92c00 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 61] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub082:1518445:1518470 [0] NCCL INFO comm 0x519aa9d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 0] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1801161 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 25] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 24] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 27] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 26] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator was aborted on rank 62. Original reason for failure was: [Rank 62] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator was aborted on rank 63. Original reason for failure was: [Rank 63] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 60] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 59] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 58] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 7] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 9] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 10] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub031:1921205:1921294 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +[W ProcessGroupNCCL.cpp:948] [Rank 8] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub026:2433087:2433174 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub026:2433087:2433107 [0] NCCL INFO comm 0x50347080 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub031:1921205:1921228 [0] NCCL INFO comm 0x92a3a80 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub031:1921206:1921295 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub031:1921206:1921230 [0] NCCL INFO comm 0xc2e65190 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub053:1664487:1664568 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub053:1664487:1664487 [1] NCCL INFO comm 0x506110d0 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub031:1921204:1921227 [0] NCCL INFO comm 0xb63f1750 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 9] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 7] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 10] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 8] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 17] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 18] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub036:1870498:1870586 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub036:1870498:1870518 [0] NCCL INFO comm 0x50c66a10 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 16] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 19] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub036:1870499:1870587 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub036:1870499:1870519 [0] NCCL INFO comm 0xa269c50 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub036:1870497:1870521 [0] NCCL INFO comm 0x4fcaadc0 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 41] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800142 milliseconds before timing out. +[W ProcessGroupNCCL.cpp:948] [Rank 2] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub015:879782:879859 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub015:879782:879802 [0] NCCL INFO comm 0x502ad7c0 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 17] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 42] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 40] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 43] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 46] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 45] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub053:1664489:1664567 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub053:1664488:1664566 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 18] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub053:1664489:1664509 [0] NCCL INFO comm 0xa9e28fe0 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub036:1870496:1870520 [0] NCCL INFO comm 0xad17bd0 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub053:1664488:1664511 [0] NCCL INFO comm 0xe3027a0 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub078:4170391:4170477 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub078:4170390:4170479 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub078:4170390:4170413 [0] NCCL INFO comm 0x1d97f440 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub078:4170391:4170416 [0] NCCL INFO comm 0x5187a990 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator was aborted on rank 19. Original reason for failure was: [Rank 19] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 2] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 16] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub053:1664486:1664508 [0] NCCL INFO comm 0x4f7ecd60 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 42] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 46] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 45] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 43] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 40] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + main() + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + S2TTask.main(cmd=cmd) + S2TTask.main(cmd=cmd) + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( + S2TTask.main(cmd=cmd) + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 + raise ProcessExitedException( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + raise ProcessExitedException( + return _run_code(code, main_globals, None, +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + raise ProcessExitedException( + S2TTask.main(cmd=cmd) +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + main() + raise ProcessExitedException( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 +srun: error: gpub052: task 9: Exited with exit code 1 +srun: error: gpub026: task 1: Exited with exit code 1 +srun: error: gpub080: task 13: Exited with exit code 1 +srun: error: gpub079: task 12: Exited with exit code 1 +srun: error: gpub015: task 0: Exited with exit code 1 +srun: error: gpub081: task 14: Exited with exit code 1 +srun: error: gpub082: task 15: Exited with exit code 1 +srun: error: gpub032: task 3: Exited with exit code 1 +srun: error: gpub050: task 7: Exited with exit code 1 +srun: error: gpub037: task 5: Exited with exit code 1 +srun: error: gpub049: task 6: Exited with exit code 1 +srun: error: gpub078: task 11: Exited with exit code 1 +srun: error: gpub051: task 8: Exited with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +srun: error: gpub036: task 4: Exited with exit code 1 +srun: error: gpub053: task 10: Exited with exit code 1 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.9.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.9.log new file mode 100644 index 0000000000000000000000000000000000000000..1e9a300c024da9605a228fbeaf2681e3f144b313 --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.9.log @@ -0,0 +1,4247 @@ +# Running on gpub001.delta.ncsa.illinois.edu +# Started at Mon Jul 3 22:24:10 CDT 2023 +# SLURMD_NODENAME=gpub001 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2121665 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2121665 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[001-002,015-016,022,030-032,059-060,066-067,076-077,079,096]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[001-002,015-016,022,030-032,059-060,066-067,076-077,079,096]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=383686 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub001 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 +[gpub001:0/64] 2023-07-03 22:27:37,296 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub001:0/64] 2023-07-03 22:27:37,982 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub001:0/64] 2023-07-03 22:27:38,012 (s2t:483) INFO: Vocabulary size: 50002 +[gpub001:0/64] 2023-07-03 22:27:54,659 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub001:0/64] 2023-07-03 22:27:54,668 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub001:0/64] 2023-07-03 22:27:54,668 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub001:0/64] 2023-07-03 22:27:54,669 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub001:0/64] 2023-07-03 22:27:54,674 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub001:0/64] 2023-07-03 22:27:55,384 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub001:0/64] 2023-07-03 22:28:02,397 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-03 22:28:02,589 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-03 22:28:02,589 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub001:0/64] 2023-07-03 22:28:02,599 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub001:0/64] 2023-07-03 22:28:03,079 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-03 22:28:03,400 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-03 22:28:03,400 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub001:0/64] 2023-07-03 22:28:03,400 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub001:0/64] 2023-07-03 22:28:31,184 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +gpub001:383774:383774 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:383774:383774 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:383774:383774 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +[gpub001:0/64] 2023-07-03 22:28:36,765 (trainer:284) INFO: 9/100epoch started +[gpub001:0/64] 2023-07-03 22:28:36,808 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-03 22:28:58,125 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-03 22:29:02,284 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-03 22:29:02,284 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpub001:0/64] 2023-07-03 22:29:02,292 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +gpub022:3399536:3399536 [2] NCCL INFO cudaDriverVersion 12010 +gpub022:3399536:3399536 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:3399536:3399536 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:3399536:3399614 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:3399536:3399614 [2] NCCL INFO Using network IB +gpub022:3399536:3399614 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub022:3399536:3399614 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub022:3399536:3399614 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub022:3399536:3399614 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub022:3399536:3399614 [2] NCCL INFO Connected all rings +gpub022:3399536:3399614 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub022:3399536:3399614 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub022:3399536:3399614 [2] NCCL INFO Connected all trees +gpub022:3399536:3399614 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:3399536:3399614 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:3399536:3399614 [2] NCCL INFO comm 0x93f2210 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub022:3399535:3399535 [1] NCCL INFO cudaDriverVersion 12010 +gpub022:3399535:3399535 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:3399535:3399535 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:3399535:3399615 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:3399535:3399615 [1] NCCL INFO Using network IB +gpub022:3399535:3399615 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub022:3399535:3399615 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub022:3399535:3399615 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub022:3399535:3399615 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub022:3399535:3399615 [1] NCCL INFO Connected all rings +gpub022:3399535:3399615 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpub022:3399535:3399615 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpub002:1756559:1756559 [0] NCCL INFO cudaDriverVersion 12010 +gpub002:1756559:1756559 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:1756559:1756559 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:1756559:1756638 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:1756559:1756638 [0] NCCL INFO Using network IB +gpub002:1756559:1756638 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub002:1756559:1756638 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub002:1756559:1756638 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:1756559:1756638 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub002:1756559:1756638 [0] NCCL INFO Connected all rings +gpub060:1938145:1938145 [2] NCCL INFO cudaDriverVersion 12010 +gpub060:1938145:1938145 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0> +gpub060:1938145:1938145 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub060:1938145:1938218 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.160<0> +gpub060:1938145:1938218 [2] NCCL INFO Using network IB +gpub060:1938145:1938218 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub060:1938145:1938218 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub060:1938145:1938218 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub060:1938145:1938218 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub060:1938145:1938218 [2] NCCL INFO Connected all rings +gpub060:1938145:1938218 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub060:1938145:1938218 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub022:3399535:3399615 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub022:3399535:3399615 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub022:3399535:3399615 [1] NCCL INFO Connected all trees +gpub022:3399535:3399615 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:3399535:3399615 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:3399535:3399615 [1] NCCL INFO comm 0x4fa312f0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:1756559:1756638 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpub002:1756559:1756638 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpub002:1756559:1756638 [0] NCCL INFO Connected all trees +gpub002:1756559:1756638 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:1756559:1756638 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:1756559:1756638 [0] NCCL INFO comm 0x51930090 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub060:1938145:1938218 [2] NCCL INFO Connected all trees +gpub060:1938145:1938218 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub060:1938145:1938218 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub060:1938145:1938218 [2] NCCL INFO comm 0xb591e2d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub022:3399534:3399534 [0] NCCL INFO cudaDriverVersion 12010 +gpub022:3399534:3399534 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:3399534:3399534 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:3399534:3399616 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:3399534:3399616 [0] NCCL INFO Using network IB +gpub022:3399534:3399616 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub022:3399534:3399616 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:3399534:3399616 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub022:3399534:3399616 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub022:3399534:3399616 [0] NCCL INFO Connected all rings +gpub002:1756561:1756561 [2] NCCL INFO cudaDriverVersion 12010 +gpub002:1756561:1756561 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:1756561:1756561 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:1756561:1756637 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:1756561:1756637 [2] NCCL INFO Using network IB +gpub002:1756561:1756637 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub002:1756561:1756637 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub002:1756561:1756637 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub002:1756561:1756637 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub002:1756561:1756637 [2] NCCL INFO Connected all rings +gpub002:1756561:1756637 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub002:1756561:1756637 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub022:3399534:3399616 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:3399534:3399616 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpub022:3399534:3399616 [0] NCCL INFO Connected all trees +gpub022:3399534:3399616 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:3399534:3399616 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:3399534:3399616 [0] NCCL INFO comm 0x50711f50 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub002:1756561:1756637 [2] NCCL INFO Connected all trees +gpub002:1756561:1756637 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:1756561:1756637 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:1756561:1756637 [2] NCCL INFO comm 0x51ad54d0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub002:1756562:1756562 [3] NCCL INFO cudaDriverVersion 12010 +gpub002:1756562:1756562 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:1756562:1756562 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:1756562:1756636 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:1756562:1756636 [3] NCCL INFO Using network IB +gpub002:1756562:1756636 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub002:1756562:1756636 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub002:1756562:1756636 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub002:1756562:1756636 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpub002:1756562:1756636 [3] NCCL INFO Connected all rings +gpub002:1756562:1756636 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub002:1756562:1756636 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub002:1756562:1756636 [3] NCCL INFO Connected all trees +gpub002:1756562:1756636 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:1756562:1756636 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:1756562:1756636 [3] NCCL INFO comm 0x9ca8ab90 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub002:1756560:1756560 [1] NCCL INFO cudaDriverVersion 12010 +gpub002:1756560:1756560 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0> +gpub002:1756560:1756560 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub002:1756560:1756635 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0> +gpub002:1756560:1756635 [1] NCCL INFO Using network IB +gpub002:1756560:1756635 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub002:1756560:1756635 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub002:1756560:1756635 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub002:1756560:1756635 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub002:1756560:1756635 [1] NCCL INFO Connected all rings +gpub002:1756560:1756635 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpub002:1756560:1756635 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpub002:1756560:1756635 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub002:1756560:1756635 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub002:1756560:1756635 [1] NCCL INFO Connected all trees +gpub002:1756560:1756635 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub002:1756560:1756635 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub002:1756560:1756635 [1] NCCL INFO comm 0x17829840 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub032:3246893:3246893 [1] NCCL INFO cudaDriverVersion 12010 +gpub032:3246893:3246893 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:3246893:3246893 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:3246893:3246975 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:3246893:3246975 [1] NCCL INFO Using network IB +gpub032:3246893:3246975 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub032:3246893:3246975 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub032:3246893:3246975 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub032:3246893:3246975 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub032:3246893:3246975 [1] NCCL INFO Connected all rings +gpub032:3246893:3246975 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpub032:3246893:3246975 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpub032:3246893:3246975 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub032:3246893:3246975 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub032:3246893:3246975 [1] NCCL INFO Connected all trees +gpub032:3246893:3246975 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub032:3246893:3246975 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:3246893:3246975 [1] NCCL INFO comm 0x9a6ad00 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub076:3343845:3343845 [2] NCCL INFO cudaDriverVersion 12010 +gpub076:3343845:3343845 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3343845:3343845 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub076:3343845:3343918 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.176<0> +gpub076:3343845:3343918 [2] NCCL INFO Using network IB +gpub076:3343845:3343918 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub076:3343845:3343918 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub076:3343845:3343918 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub076:3343845:3343918 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub076:3343845:3343918 [2] NCCL INFO Connected all rings +gpub076:3343845:3343918 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub076:3343845:3343918 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub076:3343845:3343918 [2] NCCL INFO Connected all trees +gpub076:3343845:3343918 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub076:3343845:3343918 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3343845:3343918 [2] NCCL INFO comm 0x4fe2ad90 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub022:3399537:3399537 [3] NCCL INFO cudaDriverVersion 12010 +gpub022:3399537:3399537 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:3399537:3399537 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:3399537:3399617 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0> +gpub022:3399537:3399617 [3] NCCL INFO Using network IB +gpub022:3399537:3399617 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub022:3399537:3399617 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub022:3399537:3399617 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub022:3399537:3399617 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpub022:3399537:3399617 [3] NCCL INFO Connected all rings +gpub022:3399537:3399617 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub022:3399537:3399617 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub022:3399537:3399617 [3] NCCL INFO Connected all trees +gpub022:3399537:3399617 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:3399537:3399617 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:3399537:3399617 [3] NCCL INFO comm 0x50214710 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub077:252892:252892 [0] NCCL INFO cudaDriverVersion 12010 +gpub077:252892:252892 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:252892:252892 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:252892:252962 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:252892:252962 [0] NCCL INFO Using network IB +gpub077:252892:252962 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub077:252892:252962 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub077:252892:252962 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:252892:252962 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub077:252892:252962 [0] NCCL INFO Connected all rings +gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpub077:252892:252962 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpub077:252892:252962 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpub077:252892:252962 [0] NCCL INFO Connected all trees +gpub077:252892:252962 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:252892:252962 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:252892:252962 [0] NCCL INFO comm 0x97aafd0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub015:828881:828881 [3] NCCL INFO cudaDriverVersion 12010 +gpub015:828881:828881 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:828881:828881 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:828881:828953 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:828881:828953 [3] NCCL INFO Using network IB +gpub015:828881:828953 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub015:828881:828953 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub015:828881:828953 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub015:828881:828953 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpub015:828881:828953 [3] NCCL INFO Connected all rings +gpub015:828881:828953 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub015:828881:828953 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub015:828881:828953 [3] NCCL INFO Connected all trees +gpub015:828881:828953 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:828881:828953 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:828881:828953 [3] NCCL INFO comm 0xb64dad10 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub076:3343846:3343846 [3] NCCL INFO cudaDriverVersion 12010 +gpub076:3343846:3343846 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3343846:3343846 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub076:3343846:3343921 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.176<0> +gpub076:3343846:3343921 [3] NCCL INFO Using network IB +gpub076:3343846:3343921 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub076:3343846:3343921 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub076:3343846:3343921 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub076:3343846:3343921 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpub076:3343846:3343921 [3] NCCL INFO Connected all rings +gpub076:3343846:3343921 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub076:3343846:3343921 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub076:3343846:3343921 [3] NCCL INFO Connected all trees +gpub076:3343846:3343921 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub076:3343846:3343921 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3343846:3343921 [3] NCCL INFO comm 0x50888c10 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub067:1390513:1390513 [0] NCCL INFO cudaDriverVersion 12010 +gpub067:1390513:1390513 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1390513:1390513 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1390513:1390587 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1390513:1390587 [0] NCCL INFO Using network IB +gpub067:1390513:1390587 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub067:1390513:1390587 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub067:1390513:1390587 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpub067:1390513:1390587 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub067:1390513:1390587 [0] NCCL INFO Connected all rings +gpub067:1390513:1390587 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpub067:1390513:1390587 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpub067:1390513:1390587 [0] NCCL INFO Connected all trees +gpub067:1390513:1390587 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1390513:1390587 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1390513:1390587 [0] NCCL INFO comm 0x4ef73970 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub015:828878:828878 [0] NCCL INFO cudaDriverVersion 12010 +gpub015:828878:828878 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:828878:828878 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:828878:828950 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:828878:828950 [0] NCCL INFO Using network IB +gpub015:828878:828950 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub015:828878:828950 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub015:828878:828950 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub015:828878:828950 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub015:828878:828950 [0] NCCL INFO Connected all rings +gpub015:828878:828950 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpub015:828878:828950 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpub015:828878:828950 [0] NCCL INFO Connected all trees +gpub015:828878:828950 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:828878:828950 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:828878:828950 [0] NCCL INFO comm 0x8fc63100 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub015:828879:828879 [1] NCCL INFO cudaDriverVersion 12010 +gpub015:828879:828879 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:828879:828879 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:828879:828952 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:828879:828952 [1] NCCL INFO Using network IB +gpub015:828879:828952 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub015:828879:828952 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub015:828879:828952 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub015:828879:828952 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub015:828879:828952 [1] NCCL INFO Connected all rings +gpub015:828879:828952 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpub015:828879:828952 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpub015:828879:828952 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub015:828879:828952 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub015:828879:828952 [1] NCCL INFO Connected all trees +gpub015:828879:828952 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:828879:828952 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:828879:828952 [1] NCCL INFO comm 0x8ad4b90 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub076:3343843:3343843 [0] NCCL INFO cudaDriverVersion 12010 +gpub076:3343843:3343843 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3343843:3343843 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub076:3343843:3343919 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.176<0> +gpub076:3343843:3343919 [0] NCCL INFO Using network IB +gpub076:3343843:3343919 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub076:3343843:3343919 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub076:3343843:3343919 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub076:3343843:3343919 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub076:3343843:3343919 [0] NCCL INFO Connected all rings +gpub076:3343843:3343919 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpub076:3343843:3343919 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpub076:3343843:3343919 [0] NCCL INFO Connected all trees +gpub076:3343843:3343919 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub076:3343843:3343919 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3343843:3343919 [0] NCCL INFO comm 0x508de3f0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub076:3343844:3343844 [1] NCCL INFO cudaDriverVersion 12010 +gpub076:3343844:3343844 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0> +gpub076:3343844:3343844 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub076:3343844:3343920 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.176<0> +gpub076:3343844:3343920 [1] NCCL INFO Using network IB +gpub076:3343844:3343920 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub076:3343844:3343920 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub076:3343844:3343920 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub076:3343844:3343920 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub076:3343844:3343920 [1] NCCL INFO Connected all rings +gpub076:3343844:3343920 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpub076:3343844:3343920 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpub076:3343844:3343920 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub076:3343844:3343920 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub076:3343844:3343920 [1] NCCL INFO Connected all trees +gpub076:3343844:3343920 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub076:3343844:3343920 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub076:3343844:3343920 [1] NCCL INFO comm 0xb838ee00 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub015:828880:828880 [2] NCCL INFO cudaDriverVersion 12010 +gpub015:828880:828880 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:828880:828880 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:828880:828951 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0> +gpub015:828880:828951 [2] NCCL INFO Using network IB +gpub015:828880:828951 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub015:828880:828951 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub015:828880:828951 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub015:828880:828951 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub015:828880:828951 [2] NCCL INFO Connected all rings +gpub015:828880:828951 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub015:828880:828951 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub015:828880:828951 [2] NCCL INFO Connected all trees +gpub015:828880:828951 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:828880:828951 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:828880:828951 [2] NCCL INFO comm 0x9e67ed0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub001:383775:383775 [1] NCCL INFO cudaDriverVersion 12010 +gpub001:383775:383775 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:383775:383775 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:383775:383855 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:383775:383855 [1] NCCL INFO Using network IB +gpub001:383775:383855 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub001:383775:383855 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub001:383775:383855 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub001:383775:383855 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub001:383775:383855 [1] NCCL INFO Connected all rings +gpub001:383775:383855 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub001:383775:383855 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub001:383775:383855 [1] NCCL INFO Connected all trees +gpub001:383775:383855 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:383775:383855 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:383775:383855 [1] NCCL INFO comm 0x8e376a10 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub001:383776:383776 [2] NCCL INFO cudaDriverVersion 12010 +gpub001:383776:383776 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:383776:383776 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:383776:383853 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:383776:383853 [2] NCCL INFO Using network IB +gpub001:383776:383853 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub001:383776:383853 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub001:383776:383853 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub001:383776:383853 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub001:383776:383853 [2] NCCL INFO Connected all rings +gpub001:383776:383853 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub001:383776:383853 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub001:383776:383853 [2] NCCL INFO Connected all trees +gpub079:2616803:2616803 [0] NCCL INFO cudaDriverVersion 12010 +gpub079:2616803:2616803 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:2616803:2616803 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:2616803:2616883 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:2616803:2616883 [0] NCCL INFO Using network IB +gpub079:2616803:2616883 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub079:2616803:2616883 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub079:2616803:2616883 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub079:2616803:2616883 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub079:2616803:2616883 [0] NCCL INFO Connected all rings +gpub001:383776:383853 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:383776:383853 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:383776:383853 [2] NCCL INFO comm 0xa0c5f40 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub079:2616803:2616883 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpub079:2616803:2616883 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpub079:2616803:2616883 [0] NCCL INFO Connected all trees +gpub079:2616803:2616883 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:2616803:2616883 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:2616803:2616883 [0] NCCL INFO comm 0xa9779a50 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub067:1390515:1390515 [2] NCCL INFO cudaDriverVersion 12010 +gpub067:1390515:1390515 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1390515:1390515 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1390515:1390586 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1390515:1390586 [2] NCCL INFO Using network IB +gpub067:1390515:1390586 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub067:1390515:1390586 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub067:1390515:1390586 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub067:1390515:1390586 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub067:1390515:1390586 [2] NCCL INFO Connected all rings +gpub067:1390515:1390586 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub067:1390515:1390586 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub067:1390515:1390586 [2] NCCL INFO Connected all trees +gpub067:1390515:1390586 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1390515:1390586 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1390515:1390586 [2] NCCL INFO comm 0x5030f0d0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub067:1390514:1390514 [1] NCCL INFO cudaDriverVersion 12010 +gpub067:1390514:1390514 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1390514:1390514 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1390514:1390588 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1390514:1390588 [1] NCCL INFO Using network IB +gpub067:1390514:1390588 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub067:1390514:1390588 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub067:1390514:1390588 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub067:1390514:1390588 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub067:1390514:1390588 [1] NCCL INFO Connected all rings +gpub067:1390514:1390588 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpub067:1390514:1390588 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpub077:252893:252893 [1] NCCL INFO cudaDriverVersion 12010 +gpub077:252893:252893 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:252893:252893 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:252893:252961 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:252893:252961 [1] NCCL INFO Using network IB +gpub077:252893:252961 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub077:252893:252961 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub077:252893:252961 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub077:252893:252961 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub077:252893:252961 [1] NCCL INFO Connected all rings +gpub077:252893:252961 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpub077:252893:252961 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpub067:1390514:1390588 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub067:1390514:1390588 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub067:1390514:1390588 [1] NCCL INFO Connected all trees +gpub067:1390514:1390588 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1390514:1390588 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1390514:1390588 [1] NCCL INFO comm 0xa70b75d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub077:252893:252961 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub077:252893:252961 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub077:252893:252961 [1] NCCL INFO Connected all trees +gpub077:252893:252961 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:252893:252961 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:252893:252961 [1] NCCL INFO comm 0x509e6280 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub016:1380823:1380823 [2] NCCL INFO cudaDriverVersion 12010 +gpub016:1380823:1380823 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:1380823:1380823 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:1380823:1380896 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0> +gpub016:1380823:1380896 [2] NCCL INFO Using network IB +gpub016:1380823:1380896 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub016:1380823:1380896 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub016:1380823:1380896 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub016:1380823:1380896 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub016:1380823:1380896 [2] NCCL INFO Connected all rings +gpub016:1380823:1380896 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub016:1380823:1380896 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub016:1380823:1380896 [2] NCCL INFO Connected all trees +gpub016:1380823:1380896 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub016:1380823:1380896 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:1380823:1380896 [2] NCCL INFO comm 0x517fee10 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub032:3246894:3246894 [2] NCCL INFO cudaDriverVersion 12010 +gpub032:3246894:3246894 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:3246894:3246894 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:3246894:3246976 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:3246894:3246976 [2] NCCL INFO Using network IB +gpub032:3246894:3246976 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub032:3246894:3246976 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub032:3246894:3246976 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub032:3246894:3246976 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub032:3246894:3246976 [2] NCCL INFO Connected all rings +gpub032:3246894:3246976 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub032:3246894:3246976 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub032:3246894:3246976 [2] NCCL INFO Connected all trees +gpub032:3246894:3246976 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub032:3246894:3246976 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:3246894:3246976 [2] NCCL INFO comm 0x9ddee7e0 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub001:383777:383777 [3] NCCL INFO cudaDriverVersion 12010 +gpub001:383777:383777 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0> +gpub001:383777:383777 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub001:383777:383854 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:383777:383854 [3] NCCL INFO Using network IB +gpub001:383777:383854 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub001:383777:383854 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub001:383777:383854 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub001:383777:383854 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpub001:383777:383854 [3] NCCL INFO Connected all rings +gpub001:383777:383854 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub001:383777:383854 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub001:383777:383854 [3] NCCL INFO Connected all trees +gpub001:383777:383854 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:383777:383854 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:383777:383854 [3] NCCL INFO comm 0xc243d9d0 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub030:2310659:2310659 [2] NCCL INFO cudaDriverVersion 12010 +gpub030:2310659:2310659 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2310659:2310659 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2310659:2310728 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2310659:2310728 [2] NCCL INFO Using network IB +gpub030:2310659:2310728 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub030:2310659:2310728 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub030:2310659:2310728 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub030:2310659:2310728 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub030:2310659:2310728 [2] NCCL INFO Connected all rings +gpub030:2310659:2310728 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub030:2310659:2310728 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub030:2310659:2310728 [2] NCCL INFO Connected all trees +gpub030:2310659:2310728 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2310659:2310728 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2310659:2310728 [2] NCCL INFO comm 0x8de12f60 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub030:2310657:2310657 [0] NCCL INFO cudaDriverVersion 12010 +gpub030:2310657:2310657 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2310657:2310657 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2310657:2310726 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2310657:2310726 [0] NCCL INFO Using network IB +gpub030:2310657:2310726 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub030:2310657:2310726 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub030:2310657:2310726 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2310657:2310726 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub030:2310657:2310726 [0] NCCL INFO Connected all rings +gpub060:1938143:1938143 [0] NCCL INFO cudaDriverVersion 12010 +gpub060:1938143:1938143 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0> +gpub060:1938143:1938143 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub060:1938143:1938219 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.160<0> +gpub060:1938143:1938219 [0] NCCL INFO Using network IB +gpub060:1938143:1938219 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub060:1938143:1938219 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub060:1938143:1938219 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpub060:1938143:1938219 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub060:1938143:1938219 [0] NCCL INFO Connected all rings +gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2310657:2310726 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpub030:2310657:2310726 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpub030:2310657:2310726 [0] NCCL INFO Connected all trees +gpub030:2310657:2310726 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2310657:2310726 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2310657:2310726 [0] NCCL INFO comm 0x50d929d0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpub060:1938143:1938219 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpub060:1938143:1938219 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpub060:1938143:1938219 [0] NCCL INFO Connected all trees +gpub060:1938143:1938219 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub060:1938143:1938219 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub060:1938143:1938219 [0] NCCL INFO comm 0x50561020 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub060:1938144:1938144 [1] NCCL INFO cudaDriverVersion 12010 +gpub060:1938144:1938144 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0> +gpub060:1938144:1938144 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub060:1938144:1938217 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.160<0> +gpub060:1938144:1938217 [1] NCCL INFO Using network IB +gpub060:1938144:1938217 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub060:1938144:1938217 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub060:1938144:1938217 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub060:1938144:1938217 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub060:1938144:1938217 [1] NCCL INFO Connected all rings +gpub060:1938144:1938217 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpub060:1938144:1938217 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpub060:1938144:1938217 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub060:1938144:1938217 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub060:1938144:1938217 [1] NCCL INFO Connected all trees +gpub060:1938144:1938217 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub060:1938144:1938217 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub060:1938144:1938217 [1] NCCL INFO comm 0x4f3bc650 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:2616806:2616806 [3] NCCL INFO cudaDriverVersion 12010 +gpub079:2616806:2616806 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:2616806:2616806 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:2616806:2616881 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:2616806:2616881 [3] NCCL INFO Using network IB +gpub079:2616806:2616881 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub079:2616806:2616881 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub079:2616806:2616881 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub079:2616806:2616881 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpub079:2616806:2616881 [3] NCCL INFO Connected all rings +gpub079:2616806:2616881 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub079:2616806:2616881 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub079:2616806:2616881 [3] NCCL INFO Connected all trees +gpub079:2616806:2616881 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:2616806:2616881 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:2616806:2616881 [3] NCCL INFO comm 0x89762f0 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub066:1432046:1432046 [1] NCCL INFO cudaDriverVersion 12010 +gpub066:1432046:1432046 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1432046:1432046 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1432046:1432129 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1432046:1432129 [1] NCCL INFO Using network IB +gpub066:1432046:1432129 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub066:1432046:1432129 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub066:1432046:1432129 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub066:1432046:1432129 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub066:1432046:1432129 [1] NCCL INFO Connected all rings +gpub066:1432046:1432129 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpub066:1432046:1432129 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpub066:1432046:1432129 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub066:1432046:1432129 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub066:1432046:1432129 [1] NCCL INFO Connected all trees +gpub066:1432046:1432129 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub066:1432046:1432129 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1432046:1432129 [1] NCCL INFO comm 0x4fabed20 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub066:1432045:1432045 [0] NCCL INFO cudaDriverVersion 12010 +gpub066:1432045:1432045 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1432045:1432045 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1432045:1432128 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1432045:1432128 [0] NCCL INFO Using network IB +gpub066:1432045:1432128 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub066:1432045:1432128 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1432045:1432128 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub066:1432045:1432128 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub066:1432045:1432128 [0] NCCL INFO Connected all rings +gpub066:1432045:1432128 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpub066:1432045:1432128 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpub066:1432045:1432128 [0] NCCL INFO Connected all trees +gpub066:1432045:1432128 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub066:1432045:1432128 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1432045:1432128 [0] NCCL INFO comm 0x50653520 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub016:1380824:1380824 [3] NCCL INFO cudaDriverVersion 12010 +gpub016:1380824:1380824 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:1380824:1380824 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:1380824:1380897 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0> +gpub016:1380824:1380897 [3] NCCL INFO Using network IB +gpub016:1380824:1380897 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub016:1380824:1380897 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub016:1380824:1380897 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub016:1380824:1380897 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpub016:1380824:1380897 [3] NCCL INFO Connected all rings +gpub016:1380824:1380897 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub016:1380824:1380897 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub016:1380824:1380897 [3] NCCL INFO Connected all trees +gpub016:1380824:1380897 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub016:1380824:1380897 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:1380824:1380897 [3] NCCL INFO comm 0x8d241cc0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub031:1878314:1878314 [3] NCCL INFO cudaDriverVersion 12010 +gpub031:1878314:1878314 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1878314:1878314 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1878314:1878391 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1878314:1878391 [3] NCCL INFO Using network IB +gpub031:1878314:1878391 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub031:1878314:1878391 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub031:1878314:1878391 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub031:1878314:1878391 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpub031:1878314:1878391 [3] NCCL INFO Connected all rings +gpub031:1878314:1878391 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub031:1878314:1878391 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub066:1432048:1432048 [3] NCCL INFO cudaDriverVersion 12010 +gpub066:1432048:1432048 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1432048:1432048 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1432048:1432126 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1432048:1432126 [3] NCCL INFO Using network IB +gpub066:1432048:1432126 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub066:1432048:1432126 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub066:1432048:1432126 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub066:1432048:1432126 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpub066:1432048:1432126 [3] NCCL INFO Connected all rings +gpub066:1432048:1432126 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub066:1432048:1432126 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub031:1878314:1878391 [3] NCCL INFO Connected all trees +gpub031:1878314:1878391 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:1878314:1878391 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1878314:1878391 [3] NCCL INFO comm 0x511daaa0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub066:1432048:1432126 [3] NCCL INFO Connected all trees +gpub066:1432048:1432126 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub066:1432048:1432126 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1432048:1432126 [3] NCCL INFO comm 0x51126a70 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub067:1390516:1390516 [3] NCCL INFO cudaDriverVersion 12010 +gpub067:1390516:1390516 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0> +gpub067:1390516:1390516 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub067:1390516:1390585 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0> +gpub067:1390516:1390585 [3] NCCL INFO Using network IB +gpub067:1390516:1390585 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub067:1390516:1390585 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub067:1390516:1390585 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub067:1390516:1390585 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpub067:1390516:1390585 [3] NCCL INFO Connected all rings +gpub067:1390516:1390585 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub067:1390516:1390585 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub067:1390516:1390585 [3] NCCL INFO Connected all trees +gpub067:1390516:1390585 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub067:1390516:1390585 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub067:1390516:1390585 [3] NCCL INFO comm 0x509fc1c0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub031:1878313:1878313 [2] NCCL INFO cudaDriverVersion 12010 +gpub031:1878313:1878313 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1878313:1878313 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1878313:1878389 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1878313:1878389 [2] NCCL INFO Using network IB +gpub031:1878313:1878389 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub031:1878313:1878389 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub031:1878313:1878389 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub031:1878313:1878389 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub031:1878313:1878389 [2] NCCL INFO Connected all rings +gpub031:1878313:1878389 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub031:1878313:1878389 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub031:1878313:1878389 [2] NCCL INFO Connected all trees +gpub031:1878313:1878389 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:1878313:1878389 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1878313:1878389 [2] NCCL INFO comm 0xa54f400 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub001:383774:383852 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0> +gpub001:383774:383852 [0] NCCL INFO Using network IB +gpub001:383774:383852 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub001:383774:383852 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub001:383774:383852 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub001:383774:383852 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub001:383774:383852 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:383774:383852 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:383774:383852 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub001:383774:383852 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub001:383774:383852 [0] NCCL INFO Connected all rings +gpub001:383774:383852 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpub001:383774:383852 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:383774:383852 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpub001:383774:383852 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpub001:383774:383852 [0] NCCL INFO Connected all trees +gpub001:383774:383852 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub001:383774:383852 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub001:383774:383852 [0] NCCL INFO comm 0x9b744f70 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub059:1894384:1894384 [1] NCCL INFO cudaDriverVersion 12010 +gpub059:1894384:1894384 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:1894384:1894384 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:1894384:1894459 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0> +gpub059:1894384:1894459 [1] NCCL INFO Using network IB +gpub059:1894384:1894459 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub059:1894384:1894459 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub059:1894384:1894459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub059:1894384:1894459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub059:1894384:1894459 [1] NCCL INFO Connected all rings +gpub059:1894384:1894459 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpub059:1894384:1894459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpub059:1894384:1894459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub059:1894384:1894459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub059:1894384:1894459 [1] NCCL INFO Connected all trees +gpub059:1894384:1894459 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub059:1894384:1894459 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:1894384:1894459 [1] NCCL INFO comm 0xb7b49460 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub016:1380821:1380821 [0] NCCL INFO cudaDriverVersion 12010 +gpub016:1380821:1380821 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:1380821:1380821 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:1380821:1380899 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0> +gpub016:1380821:1380899 [0] NCCL INFO Using network IB +gpub016:1380821:1380899 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub016:1380821:1380899 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub016:1380821:1380899 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpub016:1380821:1380899 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub016:1380821:1380899 [0] NCCL INFO Connected all rings +gpub016:1380821:1380899 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpub016:1380821:1380899 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpub016:1380821:1380899 [0] NCCL INFO Connected all trees +gpub016:1380821:1380899 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub016:1380821:1380899 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:1380821:1380899 [0] NCCL INFO comm 0x50896990 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub077:252894:252894 [2] NCCL INFO cudaDriverVersion 12010 +gpub077:252894:252894 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:252894:252894 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:252894:252964 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:252894:252964 [2] NCCL INFO Using network IB +gpub077:252894:252964 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub077:252894:252964 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub077:252894:252964 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub077:252894:252964 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub077:252894:252964 [2] NCCL INFO Connected all rings +gpub077:252894:252964 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub077:252894:252964 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub077:252894:252964 [2] NCCL INFO Connected all trees +gpub077:252894:252964 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:252894:252964 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:252894:252964 [2] NCCL INFO comm 0xc19a4b40 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub032:3246892:3246892 [0] NCCL INFO cudaDriverVersion 12010 +gpub032:3246892:3246892 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:3246892:3246892 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:3246892:3246974 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:3246892:3246974 [0] NCCL INFO Using network IB +gpub032:3246892:3246974 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub032:3246892:3246974 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub032:3246892:3246974 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpub032:3246892:3246974 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub032:3246892:3246974 [0] NCCL INFO Connected all rings +gpub032:3246892:3246974 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpub032:3246892:3246974 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpub032:3246892:3246974 [0] NCCL INFO Connected all trees +gpub032:3246892:3246974 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub032:3246892:3246974 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:3246892:3246974 [0] NCCL INFO comm 0x4ff3dba0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub079:2616804:2616804 [1] NCCL INFO cudaDriverVersion 12010 +gpub079:2616804:2616804 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:2616804:2616804 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:2616804:2616880 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:2616804:2616880 [1] NCCL INFO Using network IB +gpub079:2616804:2616880 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub079:2616804:2616880 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub079:2616804:2616880 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub079:2616804:2616880 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub079:2616804:2616880 [1] NCCL INFO Connected all rings +gpub079:2616804:2616880 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpub079:2616804:2616880 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpub079:2616804:2616880 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub079:2616804:2616880 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub079:2616804:2616880 [1] NCCL INFO Connected all trees +gpub079:2616804:2616880 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:2616804:2616880 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:2616804:2616880 [1] NCCL INFO comm 0x9014adc0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub079:2616805:2616805 [2] NCCL INFO cudaDriverVersion 12010 +gpub079:2616805:2616805 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0> +gpub079:2616805:2616805 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub079:2616805:2616882 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0> +gpub079:2616805:2616882 [2] NCCL INFO Using network IB +gpub079:2616805:2616882 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub079:2616805:2616882 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub079:2616805:2616882 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub079:2616805:2616882 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub079:2616805:2616882 [2] NCCL INFO Connected all rings +gpub079:2616805:2616882 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub079:2616805:2616882 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub079:2616805:2616882 [2] NCCL INFO Connected all trees +gpub079:2616805:2616882 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub079:2616805:2616882 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub079:2616805:2616882 [2] NCCL INFO comm 0x8b2c9c20 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub060:1938146:1938146 [3] NCCL INFO cudaDriverVersion 12010 +gpub060:1938146:1938146 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0> +gpub060:1938146:1938146 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub060:1938146:1938220 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.160<0> +gpub060:1938146:1938220 [3] NCCL INFO Using network IB +gpub060:1938146:1938220 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub060:1938146:1938220 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub060:1938146:1938220 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub060:1938146:1938220 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpub060:1938146:1938220 [3] NCCL INFO Connected all rings +gpub060:1938146:1938220 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub060:1938146:1938220 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub060:1938146:1938220 [3] NCCL INFO Connected all trees +gpub060:1938146:1938220 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub060:1938146:1938220 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub060:1938146:1938220 [3] NCCL INFO comm 0x50addeb0 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub016:1380822:1380822 [1] NCCL INFO cudaDriverVersion 12010 +gpub016:1380822:1380822 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:1380822:1380822 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:1380822:1380898 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0> +gpub016:1380822:1380898 [1] NCCL INFO Using network IB +gpub016:1380822:1380898 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub016:1380822:1380898 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub016:1380822:1380898 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub016:1380822:1380898 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub016:1380822:1380898 [1] NCCL INFO Connected all rings +gpub016:1380822:1380898 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpub016:1380822:1380898 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpub016:1380822:1380898 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub016:1380822:1380898 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub016:1380822:1380898 [1] NCCL INFO Connected all trees +gpub016:1380822:1380898 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub016:1380822:1380898 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:1380822:1380898 [1] NCCL INFO comm 0x9b8bb7a0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub066:1432047:1432047 [2] NCCL INFO cudaDriverVersion 12010 +gpub066:1432047:1432047 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0> +gpub066:1432047:1432047 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub066:1432047:1432127 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0> +gpub066:1432047:1432127 [2] NCCL INFO Using network IB +gpub066:1432047:1432127 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub066:1432047:1432127 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub066:1432047:1432127 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub066:1432047:1432127 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub066:1432047:1432127 [2] NCCL INFO Connected all rings +gpub066:1432047:1432127 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub066:1432047:1432127 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub066:1432047:1432127 [2] NCCL INFO Connected all trees +gpub066:1432047:1432127 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub066:1432047:1432127 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub066:1432047:1432127 [2] NCCL INFO comm 0x9ed0150 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub030:2310658:2310658 [1] NCCL INFO cudaDriverVersion 12010 +gpub030:2310658:2310658 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2310658:2310658 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2310658:2310725 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2310658:2310725 [1] NCCL INFO Using network IB +gpub030:2310658:2310725 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub030:2310658:2310725 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub030:2310658:2310725 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub030:2310658:2310725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub030:2310658:2310725 [1] NCCL INFO Connected all rings +gpub030:2310658:2310725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpub030:2310658:2310725 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpub030:2310658:2310725 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub030:2310658:2310725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub030:2310658:2310725 [1] NCCL INFO Connected all trees +gpub030:2310658:2310725 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2310658:2310725 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2310658:2310725 [1] NCCL INFO comm 0x50672d50 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub059:1894383:1894383 [0] NCCL INFO cudaDriverVersion 12010 +gpub059:1894383:1894383 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:1894383:1894383 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:1894383:1894458 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0> +gpub059:1894383:1894458 [0] NCCL INFO Using network IB +gpub059:1894383:1894458 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub059:1894383:1894458 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub059:1894383:1894458 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub059:1894383:1894458 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub059:1894383:1894458 [0] NCCL INFO Connected all rings +gpub059:1894383:1894458 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpub059:1894383:1894458 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpub059:1894383:1894458 [0] NCCL INFO Connected all trees +gpub059:1894383:1894458 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub059:1894383:1894458 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:1894383:1894458 [0] NCCL INFO comm 0x510467d0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub059:1894386:1894386 [3] NCCL INFO cudaDriverVersion 12010 +gpub059:1894386:1894386 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:1894386:1894386 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:1894386:1894456 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0> +gpub059:1894386:1894456 [3] NCCL INFO Using network IB +gpub059:1894386:1894456 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub059:1894386:1894456 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub059:1894386:1894456 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub059:1894386:1894456 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpub059:1894386:1894456 [3] NCCL INFO Connected all rings +gpub059:1894386:1894456 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub059:1894386:1894456 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub059:1894386:1894456 [3] NCCL INFO Connected all trees +gpub059:1894386:1894456 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub059:1894386:1894456 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:1894386:1894456 [3] NCCL INFO comm 0x9cf1390 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub059:1894385:1894385 [2] NCCL INFO cudaDriverVersion 12010 +gpub059:1894385:1894385 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0> +gpub059:1894385:1894385 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub059:1894385:1894457 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0> +gpub059:1894385:1894457 [2] NCCL INFO Using network IB +gpub059:1894385:1894457 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub059:1894385:1894457 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub059:1894385:1894457 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub059:1894385:1894457 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub059:1894385:1894457 [2] NCCL INFO Connected all rings +gpub059:1894385:1894457 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub059:1894385:1894457 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub059:1894385:1894457 [2] NCCL INFO Connected all trees +gpub059:1894385:1894457 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub059:1894385:1894457 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub059:1894385:1894457 [2] NCCL INFO comm 0x50af3510 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub030:2310660:2310660 [3] NCCL INFO cudaDriverVersion 12010 +gpub030:2310660:2310660 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:2310660:2310660 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:2310660:2310727 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0> +gpub030:2310660:2310727 [3] NCCL INFO Using network IB +gpub030:2310660:2310727 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub030:2310660:2310727 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub030:2310660:2310727 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub030:2310660:2310727 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpub030:2310660:2310727 [3] NCCL INFO Connected all rings +gpub030:2310660:2310727 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub030:2310660:2310727 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub030:2310660:2310727 [3] NCCL INFO Connected all trees +gpub030:2310660:2310727 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:2310660:2310727 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:2310660:2310727 [3] NCCL INFO comm 0xa84d3a10 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub031:1878312:1878312 [1] NCCL INFO cudaDriverVersion 12010 +gpub031:1878312:1878312 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1878312:1878312 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1878312:1878390 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1878312:1878390 [1] NCCL INFO Using network IB +gpub031:1878312:1878390 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub031:1878312:1878390 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub031:1878312:1878390 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub031:1878312:1878390 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub031:1878312:1878390 [1] NCCL INFO Connected all rings +gpub031:1878312:1878390 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpub031:1878312:1878390 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpub031:1878312:1878390 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub031:1878312:1878390 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub031:1878312:1878390 [1] NCCL INFO Connected all trees +gpub031:1878312:1878390 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:1878312:1878390 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1878312:1878390 [1] NCCL INFO comm 0x509faf60 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub032:3246895:3246895 [3] NCCL INFO cudaDriverVersion 12010 +gpub032:3246895:3246895 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0> +gpub032:3246895:3246895 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub032:3246895:3246973 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0> +gpub032:3246895:3246973 [3] NCCL INFO Using network IB +gpub032:3246895:3246973 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub032:3246895:3246973 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub032:3246895:3246973 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub032:3246895:3246973 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpub032:3246895:3246973 [3] NCCL INFO Connected all rings +gpub032:3246895:3246973 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub032:3246895:3246973 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub032:3246895:3246973 [3] NCCL INFO Connected all trees +gpub032:3246895:3246973 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub032:3246895:3246973 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub032:3246895:3246973 [3] NCCL INFO comm 0x1b5e5670 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub077:252895:252895 [3] NCCL INFO cudaDriverVersion 12010 +gpub077:252895:252895 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0> +gpub077:252895:252895 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub077:252895:252963 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0> +gpub077:252895:252963 [3] NCCL INFO Using network IB +gpub077:252895:252963 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub077:252895:252963 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub077:252895:252963 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub077:252895:252963 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpub077:252895:252963 [3] NCCL INFO Connected all rings +gpub077:252895:252963 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub077:252895:252963 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub077:252895:252963 [3] NCCL INFO Connected all trees +gpub077:252895:252963 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub077:252895:252963 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub077:252895:252963 [3] NCCL INFO comm 0x9491900 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub031:1878311:1878311 [0] NCCL INFO cudaDriverVersion 12010 +gpub031:1878311:1878311 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:1878311:1878311 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:1878311:1878392 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0> +gpub031:1878311:1878392 [0] NCCL INFO Using network IB +gpub031:1878311:1878392 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub031:1878311:1878392 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub031:1878311:1878392 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub031:1878311:1878392 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub031:1878311:1878392 [0] NCCL INFO Connected all rings +gpub031:1878311:1878392 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpub031:1878311:1878392 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpub031:1878311:1878392 [0] NCCL INFO Connected all trees +gpub031:1878311:1878392 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:1878311:1878392 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:1878311:1878392 [0] NCCL INFO comm 0xba515710 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub096:1440104:1440104 [3] NCCL INFO cudaDriverVersion 12010 +gpub096:1440104:1440104 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1440104:1440104 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1440104:1440176 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1440104:1440176 [3] NCCL INFO Using network IB +gpub096:1440104:1440176 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub096:1440104:1440176 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub096:1440104:1440176 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub096:1440104:1440176 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpub096:1440104:1440176 [3] NCCL INFO Connected all rings +gpub096:1440104:1440176 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub096:1440104:1440176 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub096:1440104:1440176 [3] NCCL INFO Connected all trees +gpub096:1440104:1440176 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1440104:1440176 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1440104:1440176 [3] NCCL INFO comm 0x9f265ce0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub096:1440103:1440103 [2] NCCL INFO cudaDriverVersion 12010 +gpub096:1440103:1440103 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1440103:1440103 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1440103:1440178 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1440103:1440178 [2] NCCL INFO Using network IB +gpub096:1440103:1440178 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub096:1440103:1440178 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub096:1440103:1440178 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub096:1440103:1440178 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub096:1440103:1440178 [2] NCCL INFO Connected all rings +gpub096:1440103:1440178 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub096:1440103:1440178 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub096:1440103:1440178 [2] NCCL INFO Connected all trees +gpub096:1440103:1440178 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1440103:1440178 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1440103:1440178 [2] NCCL INFO comm 0x91c6060 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub096:1440101:1440101 [0] NCCL INFO cudaDriverVersion 12010 +gpub096:1440101:1440101 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1440101:1440101 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1440101:1440177 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1440101:1440177 [0] NCCL INFO Using network IB +gpub096:1440101:1440177 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub096:1440101:1440177 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub096:1440101:1440177 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub096:1440101:1440177 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpub096:1440101:1440177 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub096:1440101:1440177 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub096:1440101:1440177 [0] NCCL INFO Connected all rings +gpub096:1440101:1440177 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpub096:1440101:1440177 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpub096:1440101:1440177 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpub096:1440101:1440177 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpub096:1440101:1440177 [0] NCCL INFO Connected all trees +gpub096:1440101:1440177 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1440101:1440177 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1440101:1440177 [0] NCCL INFO comm 0x50b020d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub096:1440102:1440102 [1] NCCL INFO cudaDriverVersion 12010 +gpub096:1440102:1440102 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0> +gpub096:1440102:1440102 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub096:1440102:1440179 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0> +gpub096:1440102:1440179 [1] NCCL INFO Using network IB +gpub096:1440102:1440179 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub096:1440102:1440179 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub096:1440102:1440179 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub096:1440102:1440179 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub096:1440102:1440179 [1] NCCL INFO Connected all rings +gpub096:1440102:1440179 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub096:1440102:1440179 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub096:1440102:1440179 [1] NCCL INFO Connected all trees +gpub096:1440102:1440179 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub096:1440102:1440179 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub096:1440102:1440179 [1] NCCL INFO comm 0x50d96930 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator()) +[gpub001:0/64] 2023-07-03 22:35:20,343 (trainer:732) INFO: 9epoch:train:1-100batch: iter_time=1.479, forward_time=0.254, loss_ctc=89.671, loss_att=65.418, acc=0.668, loss=72.694, backward_time=1.040, grad_norm=92.761, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.336e-04, train_time=8.067 +[gpub001:0/64] 2023-07-03 22:37:35,916 (trainer:732) INFO: 9epoch:train:101-200batch: iter_time=1.205e-04, forward_time=0.142, loss_ctc=77.074, loss_att=59.177, acc=0.644, loss=64.546, backward_time=1.024, grad_norm=98.231, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.335e-04, train_time=2.714 +[gpub001:0/64] 2023-07-03 22:39:51,736 (trainer:732) INFO: 9epoch:train:201-300batch: iter_time=1.193e-04, forward_time=0.141, loss_ctc=83.406, loss_att=66.282, acc=0.665, loss=71.419, backward_time=1.025, grad_norm=105.432, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.334e-04, train_time=2.716 +[gpub001:0/64] 2023-07-03 22:42:06,607 (trainer:732) INFO: 9epoch:train:301-400batch: iter_time=1.219e-04, forward_time=0.142, loss_ctc=70.602, loss_att=53.997, acc=0.653, loss=58.979, backward_time=1.022, grad_norm=85.786, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.333e-04, train_time=2.697 +[gpub001:0/64] 2023-07-03 22:44:49,865 (trainer:732) INFO: 9epoch:train:401-500batch: iter_time=1.219e-04, forward_time=0.142, loss_ctc=84.185, loss_att=67.677, acc=0.649, loss=72.629, backward_time=1.077, grad_norm=101.589, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.332e-04, train_time=3.265 +[gpub001:0/64] 2023-07-03 22:47:04,832 (trainer:732) INFO: 9epoch:train:501-600batch: iter_time=1.129e-04, forward_time=0.140, loss_ctc=77.007, loss_att=63.663, acc=0.649, loss=67.666, backward_time=1.021, grad_norm=119.077, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.331e-04, train_time=2.699 +[gpub001:0/64] 2023-07-03 22:49:20,582 (trainer:732) INFO: 9epoch:train:601-700batch: iter_time=1.158e-04, forward_time=0.142, loss_ctc=69.128, loss_att=50.937, acc=0.670, loss=56.394, backward_time=1.023, grad_norm=85.221, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.330e-04, train_time=2.715 +[gpub001:0/64] 2023-07-03 22:51:38,011 (trainer:732) INFO: 9epoch:train:701-800batch: iter_time=1.240e-04, forward_time=0.142, loss_ctc=87.200, loss_att=70.450, acc=0.661, loss=75.475, backward_time=1.023, grad_norm=107.449, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.329e-04, train_time=2.748 +[gpub001:0/64] 2023-07-03 22:54:40,138 (trainer:732) INFO: 9epoch:train:801-900batch: iter_time=1.192e-04, forward_time=0.142, loss_ctc=84.642, loss_att=62.734, acc=0.671, loss=69.306, backward_time=1.081, grad_norm=87.657, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.328e-04, train_time=3.642 +[gpub001:0/64] 2023-07-03 22:57:27,498 (trainer:732) INFO: 9epoch:train:901-1000batch: iter_time=1.098e-04, forward_time=0.141, loss_ctc=84.316, loss_att=59.445, acc=0.671, loss=66.906, backward_time=1.063, grad_norm=104.559, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.327e-04, train_time=3.347 +[gpub001:0/64] 2023-07-03 22:57:41,932 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-03 22:58:04,079 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-03 22:58:08,258 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-03 22:58:08,259 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpub001:0/64] 2023-07-03 22:58:08,266 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-03 23:04:34,809 (trainer:732) INFO: 9epoch:train:1001-1100batch: iter_time=2.754, forward_time=0.180, loss_ctc=90.046, loss_att=65.751, acc=0.658, loss=73.039, backward_time=1.044, grad_norm=97.475, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.326e-04, train_time=8.546 +[gpub001:0/64] 2023-07-03 23:06:58,236 (trainer:732) INFO: 9epoch:train:1101-1200batch: iter_time=1.332e-04, forward_time=0.144, loss_ctc=77.169, loss_att=57.792, acc=0.640, loss=63.605, backward_time=1.031, grad_norm=84.645, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.325e-04, train_time=2.869 +[gpub001:0/64] 2023-07-03 23:09:20,632 (trainer:732) INFO: 9epoch:train:1201-1300batch: iter_time=1.286e-04, forward_time=0.144, loss_ctc=83.261, loss_att=65.116, acc=0.660, loss=70.559, backward_time=1.033, grad_norm=92.693, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.325e-04, train_time=2.848 +[gpub001:0/64] 2023-07-03 23:11:45,452 (trainer:732) INFO: 9epoch:train:1301-1400batch: iter_time=1.543e-04, forward_time=0.145, loss_ctc=69.216, loss_att=52.051, acc=0.651, loss=57.200, backward_time=1.030, grad_norm=85.186, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.324e-04, train_time=2.896 +[gpub001:0/64] 2023-07-03 23:14:11,669 (trainer:732) INFO: 9epoch:train:1401-1500batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=81.472, loss_att=65.882, acc=0.654, loss=70.559, backward_time=1.051, grad_norm=98.760, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.323e-04, train_time=2.924 +[gpub001:0/64] 2023-07-03 23:16:47,813 (trainer:732) INFO: 9epoch:train:1501-1600batch: iter_time=1.272e-04, forward_time=0.144, loss_ctc=75.580, loss_att=63.756, acc=0.646, loss=67.303, backward_time=1.053, grad_norm=90.582, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.322e-04, train_time=3.123 +[gpub001:0/64] 2023-07-03 23:19:18,487 (trainer:732) INFO: 9epoch:train:1601-1700batch: iter_time=1.166e-04, forward_time=0.144, loss_ctc=70.019, loss_att=51.653, acc=0.666, loss=57.163, backward_time=1.035, grad_norm=88.067, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.321e-04, train_time=3.013 +[gpub001:0/64] 2023-07-03 23:22:07,029 (trainer:732) INFO: 9epoch:train:1701-1800batch: iter_time=1.322e-04, forward_time=0.145, loss_ctc=83.559, loss_att=68.890, acc=0.660, loss=73.291, backward_time=1.058, grad_norm=93.644, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.320e-04, train_time=3.371 +[gpub001:0/64] 2023-07-03 23:24:43,521 (trainer:732) INFO: 9epoch:train:1801-1900batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=82.347, loss_att=62.197, acc=0.665, loss=68.242, backward_time=1.071, grad_norm=81.502, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.319e-04, train_time=3.130 +[gpub001:0/64] 2023-07-03 23:27:16,221 (trainer:732) INFO: 9epoch:train:1901-2000batch: iter_time=1.205e-04, forward_time=0.145, loss_ctc=79.689, loss_att=57.173, acc=0.675, loss=63.928, backward_time=1.053, grad_norm=110.108, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.318e-04, train_time=3.054 +[gpub001:0/64] 2023-07-03 23:27:18,258 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-03 23:27:40,440 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-03 23:27:44,994 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-03 23:27:44,994 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpub001:0/64] 2023-07-03 23:27:45,002 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-03 23:33:19,007 (trainer:732) INFO: 9epoch:train:2001-2100batch: iter_time=1.581, forward_time=0.193, loss_ctc=87.970, loss_att=63.692, acc=0.678, loss=70.975, backward_time=1.046, grad_norm=89.931, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.192, optim0_lr0=1.317e-04, train_time=7.255 +[gpub001:0/64] 2023-07-03 23:35:35,590 (trainer:732) INFO: 9epoch:train:2101-2200batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=76.465, loss_att=57.377, acc=0.649, loss=63.104, backward_time=1.025, grad_norm=85.685, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.316e-04, train_time=2.732 +[gpub001:0/64] 2023-07-03 23:37:51,391 (trainer:732) INFO: 9epoch:train:2201-2300batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=83.755, loss_att=67.648, acc=0.667, loss=72.480, backward_time=1.025, grad_norm=95.612, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.315e-04, train_time=2.716 +[gpub001:0/64] 2023-07-03 23:40:06,439 (trainer:732) INFO: 9epoch:train:2301-2400batch: iter_time=1.220e-04, forward_time=0.144, loss_ctc=67.251, loss_att=51.411, acc=0.664, loss=56.163, backward_time=1.021, grad_norm=84.349, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.314e-04, train_time=2.701 +[gpub001:0/64] 2023-07-03 23:42:25,815 (trainer:732) INFO: 9epoch:train:2401-2500batch: iter_time=1.184e-04, forward_time=0.145, loss_ctc=81.714, loss_att=66.031, acc=0.656, loss=70.736, backward_time=1.030, grad_norm=98.459, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.313e-04, train_time=2.787 +[gpub001:0/64] 2023-07-03 23:44:44,239 (trainer:732) INFO: 9epoch:train:2501-2600batch: iter_time=1.231e-04, forward_time=0.145, loss_ctc=74.569, loss_att=61.337, acc=0.654, loss=65.307, backward_time=1.031, grad_norm=86.312, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.313e-04, train_time=2.768 +[gpub001:0/64] 2023-07-03 23:47:09,040 (trainer:732) INFO: 9epoch:train:2601-2700batch: iter_time=1.325e-04, forward_time=0.144, loss_ctc=70.692, loss_att=50.739, acc=0.672, loss=56.725, backward_time=1.034, grad_norm=94.955, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.312e-04, train_time=2.896 +[gpub001:0/64] 2023-07-03 23:49:46,981 (trainer:732) INFO: 9epoch:train:2701-2800batch: iter_time=1.268e-04, forward_time=0.146, loss_ctc=82.425, loss_att=66.291, acc=0.670, loss=71.131, backward_time=1.050, grad_norm=96.178, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.311e-04, train_time=3.159 +[gpub001:0/64] 2023-07-03 23:52:17,933 (trainer:732) INFO: 9epoch:train:2801-2900batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=82.507, loss_att=61.707, acc=0.676, loss=67.947, backward_time=1.069, grad_norm=78.205, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.310e-04, train_time=3.019 +[gpub001:0/64] 2023-07-03 23:55:02,472 (trainer:732) INFO: 9epoch:train:2901-3000batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=80.597, loss_att=57.983, acc=0.678, loss=64.767, backward_time=1.050, grad_norm=96.704, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.309e-04, train_time=3.291 +[gpub001:0/64] 2023-07-03 23:55:22,500 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-03 23:55:45,123 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-03 23:55:49,354 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-03 23:55:49,354 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpub001:0/64] 2023-07-03 23:55:49,431 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 00:00:56,009 (trainer:732) INFO: 9epoch:train:3001-3100batch: iter_time=2.040, forward_time=0.188, loss_ctc=87.894, loss_att=64.088, acc=0.666, loss=71.230, backward_time=1.042, grad_norm=89.421, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.184, optim0_lr0=1.308e-04, train_time=7.070 +[gpub001:0/64] 2023-07-04 00:03:24,637 (trainer:732) INFO: 9epoch:train:3101-3200batch: iter_time=7.802e-04, forward_time=0.237, loss_ctc=73.542, loss_att=54.936, acc=0.650, loss=60.518, backward_time=1.038, grad_norm=83.280, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.190, optim0_lr0=1.307e-04, train_time=2.973 +[gpub001:0/64] 2023-07-04 00:05:45,751 (trainer:732) INFO: 9epoch:train:3201-3300batch: iter_time=1.097e-04, forward_time=0.168, loss_ctc=83.071, loss_att=63.854, acc=0.666, loss=69.619, backward_time=1.029, grad_norm=84.872, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.183, optim0_lr0=1.306e-04, train_time=2.820 +[gpub001:0/64] 2023-07-04 00:08:15,073 (trainer:732) INFO: 9epoch:train:3301-3400batch: iter_time=2.763e-04, forward_time=0.240, loss_ctc=67.308, loss_att=51.039, acc=0.655, loss=55.919, backward_time=1.042, grad_norm=76.085, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.187, optim0_lr0=1.305e-04, train_time=2.988 +[gpub001:0/64] 2023-07-04 00:10:38,777 (trainer:732) INFO: 9epoch:train:3401-3500batch: iter_time=1.431e-04, forward_time=0.181, loss_ctc=81.458, loss_att=64.858, acc=0.657, loss=69.838, backward_time=1.035, grad_norm=85.868, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.185, optim0_lr0=1.305e-04, train_time=2.873 +[gpub001:0/64] 2023-07-04 00:13:07,268 (trainer:732) INFO: 9epoch:train:3501-3600batch: iter_time=4.291e-04, forward_time=0.234, loss_ctc=73.981, loss_att=61.944, acc=0.651, loss=65.555, backward_time=1.036, grad_norm=95.691, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.186, optim0_lr0=1.304e-04, train_time=2.970 +[gpub001:0/64] 2023-07-04 00:15:26,106 (trainer:732) INFO: 9epoch:train:3601-3700batch: iter_time=1.187e-04, forward_time=0.168, loss_ctc=68.552, loss_att=50.700, acc=0.671, loss=56.056, backward_time=1.026, grad_norm=80.833, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.303e-04, train_time=2.775 +[gpub001:0/64] 2023-07-04 00:18:11,878 (trainer:732) INFO: 9epoch:train:3701-3800batch: iter_time=5.008e-04, forward_time=0.250, loss_ctc=81.535, loss_att=67.538, acc=0.661, loss=71.737, backward_time=1.059, grad_norm=101.130, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.188, optim0_lr0=1.302e-04, train_time=3.316 +[gpub001:0/64] 2023-07-04 00:20:43,658 (trainer:732) INFO: 9epoch:train:3801-3900batch: iter_time=5.966e-04, forward_time=0.156, loss_ctc=82.350, loss_att=61.685, acc=0.669, loss=67.885, backward_time=1.048, grad_norm=98.009, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.301e-04, train_time=3.035 +[gpub001:0/64] 2023-07-04 00:23:35,253 (trainer:732) INFO: 9epoch:train:3901-4000batch: iter_time=1.404e-04, forward_time=0.240, loss_ctc=78.814, loss_att=56.564, acc=0.678, loss=63.239, backward_time=1.090, grad_norm=90.393, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.185, optim0_lr0=1.300e-04, train_time=3.431 +[gpub001:0/64] 2023-07-04 00:23:55,438 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-04 00:24:17,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 00:24:21,806 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 00:24:21,806 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpub001:0/64] 2023-07-04 00:24:21,829 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 00:31:27,733 (trainer:732) INFO: 9epoch:train:4001-4100batch: iter_time=2.183, forward_time=0.215, loss_ctc=87.545, loss_att=63.219, acc=0.679, loss=70.517, backward_time=1.043, grad_norm=95.106, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.186, optim0_lr0=1.299e-04, train_time=9.450 +[gpub001:0/64] 2023-07-04 00:33:43,494 (trainer:732) INFO: 9epoch:train:4101-4200batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=74.551, loss_att=56.168, acc=0.656, loss=61.683, backward_time=1.024, grad_norm=92.157, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.298e-04, train_time=2.715 +[gpub001:0/64] 2023-07-04 00:35:59,666 (trainer:732) INFO: 9epoch:train:4201-4300batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=82.064, loss_att=63.472, acc=0.678, loss=69.050, backward_time=1.028, grad_norm=98.221, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.297e-04, train_time=2.723 +[gpub001:0/64] 2023-07-04 00:38:15,095 (trainer:732) INFO: 9epoch:train:4301-4400batch: iter_time=1.075e-04, forward_time=0.143, loss_ctc=68.892, loss_att=51.913, acc=0.663, loss=57.007, backward_time=1.023, grad_norm=82.794, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.297e-04, train_time=2.708 +[gpub001:0/64] 2023-07-04 00:40:30,833 (trainer:732) INFO: 9epoch:train:4401-4500batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=82.133, loss_att=65.286, acc=0.663, loss=70.340, backward_time=1.027, grad_norm=97.104, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.296e-04, train_time=2.715 +[gpub001:0/64] 2023-07-04 00:42:58,614 (trainer:732) INFO: 9epoch:train:4501-4600batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=73.526, loss_att=61.357, acc=0.659, loss=65.008, backward_time=1.042, grad_norm=89.909, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.295e-04, train_time=2.955 +[gpub001:0/64] 2023-07-04 00:45:13,932 (trainer:732) INFO: 9epoch:train:4601-4700batch: iter_time=1.179e-04, forward_time=0.144, loss_ctc=69.403, loss_att=50.034, acc=0.676, loss=55.845, backward_time=1.023, grad_norm=93.331, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.294e-04, train_time=2.706 +[gpub001:0/64] 2023-07-04 00:47:42,926 (trainer:732) INFO: 9epoch:train:4701-4800batch: iter_time=1.269e-04, forward_time=0.145, loss_ctc=81.878, loss_att=66.373, acc=0.671, loss=71.025, backward_time=1.048, grad_norm=98.786, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.293e-04, train_time=2.980 +[gpub001:0/64] 2023-07-04 00:50:21,085 (trainer:732) INFO: 9epoch:train:4801-4900batch: iter_time=1.140e-04, forward_time=0.146, loss_ctc=83.277, loss_att=60.906, acc=0.679, loss=67.617, backward_time=1.047, grad_norm=96.167, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.292e-04, train_time=3.163 +[gpub001:0/64] 2023-07-04 00:52:52,099 (trainer:732) INFO: 9epoch:train:4901-5000batch: iter_time=1.094e-04, forward_time=0.146, loss_ctc=79.400, loss_att=56.855, acc=0.683, loss=63.619, backward_time=1.051, grad_norm=92.534, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.291e-04, train_time=3.020 +[gpub001:0/64] 2023-07-04 00:53:12,127 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-04 00:53:34,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 00:53:38,834 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 00:53:38,834 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpub001:0/64] 2023-07-04 00:53:38,842 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 00:59:54,062 (trainer:732) INFO: 9epoch:train:5001-5100batch: iter_time=1.788, forward_time=0.204, loss_ctc=86.818, loss_att=63.626, acc=0.669, loss=70.584, backward_time=1.040, grad_norm=93.196, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.183, optim0_lr0=1.291e-04, train_time=8.438 +[gpub001:0/64] 2023-07-04 01:02:10,587 (trainer:732) INFO: 9epoch:train:5101-5200batch: iter_time=1.492e-04, forward_time=0.146, loss_ctc=76.113, loss_att=56.301, acc=0.651, loss=62.245, backward_time=1.026, grad_norm=83.102, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.183, optim0_lr0=1.290e-04, train_time=2.732 +[gpub001:0/64] 2023-07-04 01:04:27,185 (trainer:732) INFO: 9epoch:train:5201-5300batch: iter_time=1.206e-04, forward_time=0.144, loss_ctc=80.303, loss_att=62.833, acc=0.671, loss=68.074, backward_time=1.028, grad_norm=89.660, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.289e-04, train_time=2.732 +[gpub001:0/64] 2023-07-04 01:06:45,184 (trainer:732) INFO: 9epoch:train:5301-5400batch: iter_time=1.284e-04, forward_time=0.144, loss_ctc=65.878, loss_att=49.861, acc=0.667, loss=54.666, backward_time=1.025, grad_norm=73.201, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.288e-04, train_time=2.760 +[gpub001:0/64] 2023-07-04 01:09:00,951 (trainer:732) INFO: 9epoch:train:5401-5500batch: iter_time=1.160e-04, forward_time=0.144, loss_ctc=80.403, loss_att=63.695, acc=0.661, loss=68.708, backward_time=1.023, grad_norm=99.950, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.287e-04, train_time=2.715 +[gpub001:0/64] 2023-07-04 01:11:27,791 (trainer:732) INFO: 9epoch:train:5501-5600batch: iter_time=1.300e-04, forward_time=0.144, loss_ctc=73.122, loss_att=62.649, acc=0.653, loss=65.791, backward_time=1.033, grad_norm=94.019, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.286e-04, train_time=2.937 +[gpub001:0/64] 2023-07-04 01:13:52,865 (trainer:732) INFO: 9epoch:train:5601-5700batch: iter_time=1.200e-04, forward_time=0.144, loss_ctc=67.757, loss_att=50.360, acc=0.677, loss=55.579, backward_time=1.034, grad_norm=81.435, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.183, optim0_lr0=1.285e-04, train_time=2.901 +[gpub001:0/64] 2023-07-04 01:16:15,288 (trainer:732) INFO: 9epoch:train:5701-5800batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=80.702, loss_att=67.135, acc=0.662, loss=71.205, backward_time=1.033, grad_norm=92.187, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.285e-04, train_time=2.848 +[gpub001:0/64] 2023-07-04 01:19:17,300 (trainer:732) INFO: 9epoch:train:5801-5900batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=81.556, loss_att=61.325, acc=0.670, loss=67.394, backward_time=1.087, grad_norm=98.823, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.284e-04, train_time=3.640 +[gpub001:0/64] 2023-07-04 01:21:54,934 (trainer:732) INFO: 9epoch:train:5901-6000batch: iter_time=1.163e-04, forward_time=0.143, loss_ctc=78.794, loss_att=57.019, acc=0.673, loss=63.552, backward_time=1.039, grad_norm=110.341, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.283e-04, train_time=3.152 +[gpub001:0/64] 2023-07-04 01:22:12,906 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-04 01:22:35,275 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 01:22:39,453 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 01:22:39,453 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpub001:0/64] 2023-07-04 01:22:39,563 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 01:27:58,379 (trainer:732) INFO: 9epoch:train:6001-6100batch: iter_time=2.080, forward_time=0.171, loss_ctc=85.395, loss_att=61.689, acc=0.672, loss=68.801, backward_time=1.043, grad_norm=87.997, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.184, optim0_lr0=1.282e-04, train_time=7.268 +[gpub001:0/64] 2023-07-04 01:30:14,639 (trainer:732) INFO: 9epoch:train:6101-6200batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=73.105, loss_att=54.251, acc=0.658, loss=59.907, backward_time=1.024, grad_norm=81.029, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.281e-04, train_time=2.725 +[gpub001:0/64] 2023-07-04 01:32:30,963 (trainer:732) INFO: 9epoch:train:6201-6300batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=82.012, loss_att=61.684, acc=0.672, loss=67.782, backward_time=1.027, grad_norm=92.039, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.280e-04, train_time=2.726 +[gpub001:0/64] 2023-07-04 01:34:46,436 (trainer:732) INFO: 9epoch:train:6301-6400batch: iter_time=1.161e-04, forward_time=0.144, loss_ctc=68.673, loss_att=51.531, acc=0.660, loss=56.674, backward_time=1.022, grad_norm=84.864, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.280e-04, train_time=2.709 +[gpub001:0/64] 2023-07-04 01:37:02,196 (trainer:732) INFO: 9epoch:train:6401-6500batch: iter_time=1.343e-04, forward_time=0.146, loss_ctc=80.427, loss_att=63.759, acc=0.665, loss=68.760, backward_time=1.026, grad_norm=91.094, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.279e-04, train_time=2.715 +[gpub001:0/64] 2023-07-04 01:39:23,012 (trainer:732) INFO: 9epoch:train:6501-6600batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=71.841, loss_att=62.207, acc=0.654, loss=65.098, backward_time=1.028, grad_norm=91.719, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.278e-04, train_time=2.816 +[gpub001:0/64] 2023-07-04 01:41:50,816 (trainer:732) INFO: 9epoch:train:6601-6700batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=67.160, loss_att=49.622, acc=0.677, loss=54.883, backward_time=1.040, grad_norm=85.772, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.277e-04, train_time=2.956 +[gpub001:0/64] 2023-07-04 01:44:34,992 (trainer:732) INFO: 9epoch:train:6701-6800batch: iter_time=1.203e-04, forward_time=0.146, loss_ctc=80.981, loss_att=67.197, acc=0.664, loss=71.332, backward_time=1.065, grad_norm=96.825, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.276e-04, train_time=3.283 +[gpub001:0/64] 2023-07-04 01:47:19,271 (trainer:732) INFO: 9epoch:train:6801-6900batch: iter_time=1.257e-04, forward_time=0.145, loss_ctc=82.167, loss_att=60.632, acc=0.674, loss=67.092, backward_time=1.082, grad_norm=84.474, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.275e-04, train_time=3.285 +[gpub001:0/64] 2023-07-04 01:50:08,231 (trainer:732) INFO: 9epoch:train:6901-7000batch: iter_time=1.171e-04, forward_time=0.143, loss_ctc=79.723, loss_att=57.262, acc=0.677, loss=64.000, backward_time=1.133, grad_norm=106.009, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.275e-04, train_time=3.379 +[gpub001:0/64] 2023-07-04 01:50:26,658 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-04 01:50:49,043 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 01:50:53,273 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 01:50:53,273 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpub001:0/64] 2023-07-04 01:50:53,280 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 01:56:44,529 (trainer:732) INFO: 9epoch:train:7001-7100batch: iter_time=1.794, forward_time=0.205, loss_ctc=85.101, loss_att=63.351, acc=0.681, loss=69.876, backward_time=1.054, grad_norm=94.225, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.274e-04, train_time=7.925 +[gpub001:0/64] 2023-07-04 01:59:09,417 (trainer:732) INFO: 9epoch:train:7101-7200batch: iter_time=1.088e-04, forward_time=0.145, loss_ctc=73.666, loss_att=55.956, acc=0.661, loss=61.269, backward_time=1.036, grad_norm=80.604, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.273e-04, train_time=2.898 +[gpub001:0/64] 2023-07-04 02:01:39,261 (trainer:732) INFO: 9epoch:train:7201-7300batch: iter_time=1.157e-04, forward_time=0.144, loss_ctc=81.381, loss_att=63.231, acc=0.677, loss=68.676, backward_time=1.040, grad_norm=84.281, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.272e-04, train_time=2.997 +[gpub001:0/64] 2023-07-04 02:03:59,902 (trainer:732) INFO: 9epoch:train:7301-7400batch: iter_time=1.094e-04, forward_time=0.144, loss_ctc=66.904, loss_att=50.560, acc=0.673, loss=55.463, backward_time=1.028, grad_norm=89.574, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.271e-04, train_time=2.813 +[gpub001:0/64] 2023-07-04 02:06:31,880 (trainer:732) INFO: 9epoch:train:7401-7500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=79.781, loss_att=64.132, acc=0.667, loss=68.827, backward_time=1.043, grad_norm=81.963, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.270e-04, train_time=3.039 +[gpub001:0/64] 2023-07-04 02:09:19,491 (trainer:732) INFO: 9epoch:train:7501-7600batch: iter_time=1.151e-04, forward_time=0.143, loss_ctc=74.243, loss_att=62.088, acc=0.657, loss=65.734, backward_time=1.102, grad_norm=86.931, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.270e-04, train_time=3.352 +[gpub001:0/64] 2023-07-04 02:11:55,587 (trainer:732) INFO: 9epoch:train:7601-7700batch: iter_time=1.161e-04, forward_time=0.142, loss_ctc=67.305, loss_att=48.568, acc=0.681, loss=54.189, backward_time=1.068, grad_norm=80.772, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.269e-04, train_time=3.122 +[gpub001:0/64] 2023-07-04 02:14:33,924 (trainer:732) INFO: 9epoch:train:7701-7800batch: iter_time=1.073e-04, forward_time=0.143, loss_ctc=81.014, loss_att=66.986, acc=0.670, loss=71.195, backward_time=1.054, grad_norm=87.112, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.268e-04, train_time=3.166 +[gpub001:0/64] 2023-07-04 02:17:40,732 (trainer:732) INFO: 9epoch:train:7801-7900batch: iter_time=1.189e-04, forward_time=0.144, loss_ctc=81.461, loss_att=60.824, acc=0.683, loss=67.015, backward_time=1.088, grad_norm=88.042, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.267e-04, train_time=3.736 +[gpub001:0/64] 2023-07-04 02:20:31,309 (trainer:732) INFO: 9epoch:train:7901-8000batch: iter_time=1.118e-04, forward_time=0.143, loss_ctc=77.800, loss_att=55.616, acc=0.685, loss=62.271, backward_time=1.068, grad_norm=96.283, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.266e-04, train_time=3.411 +[gpub001:0/64] 2023-07-04 02:20:50,628 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-04 02:21:12,925 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 02:21:17,185 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 02:21:17,185 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpub001:0/64] 2023-07-04 02:21:17,193 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 02:27:33,747 (trainer:732) INFO: 9epoch:train:8001-8100batch: iter_time=2.310, forward_time=0.216, loss_ctc=86.868, loss_att=62.477, acc=0.682, loss=69.795, backward_time=1.066, grad_norm=93.604, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.186, optim0_lr0=1.265e-04, train_time=8.448 +[gpub001:0/64] 2023-07-04 02:30:47,915 (trainer:732) INFO: 9epoch:train:8101-8200batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=72.041, loss_att=54.309, acc=0.663, loss=59.628, backward_time=1.122, grad_norm=85.541, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.265e-04, train_time=3.884 +[gpub001:0/64] 2023-07-04 02:34:23,550 (trainer:732) INFO: 9epoch:train:8201-8300batch: iter_time=1.110e-04, forward_time=0.143, loss_ctc=80.318, loss_att=62.685, acc=0.682, loss=67.975, backward_time=1.191, grad_norm=91.389, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.264e-04, train_time=4.312 +[gpub001:0/64] 2023-07-04 02:38:10,352 (trainer:732) INFO: 9epoch:train:8301-8400batch: iter_time=1.088e-04, forward_time=0.143, loss_ctc=66.182, loss_att=50.205, acc=0.672, loss=54.998, backward_time=1.197, grad_norm=84.213, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.263e-04, train_time=4.536 +[gpub001:0/64] 2023-07-04 02:41:23,634 (trainer:732) INFO: 9epoch:train:8401-8500batch: iter_time=1.048e-04, forward_time=0.144, loss_ctc=80.035, loss_att=64.016, acc=0.668, loss=68.822, backward_time=1.096, grad_norm=82.939, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.262e-04, train_time=3.865 +[gpub001:0/64] 2023-07-04 02:45:08,691 (trainer:732) INFO: 9epoch:train:8501-8600batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=72.274, loss_att=60.192, acc=0.664, loss=63.816, backward_time=1.123, grad_norm=86.066, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.261e-04, train_time=4.501 +[gpub001:0/64] 2023-07-04 02:48:18,435 (trainer:732) INFO: 9epoch:train:8601-8700batch: iter_time=1.254e-04, forward_time=0.146, loss_ctc=66.300, loss_att=48.163, acc=0.686, loss=53.604, backward_time=1.120, grad_norm=78.350, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.261e-04, train_time=3.795 +[gpub001:0/64] 2023-07-04 02:51:37,339 (trainer:732) INFO: 9epoch:train:8701-8800batch: iter_time=1.333e-04, forward_time=0.146, loss_ctc=80.163, loss_att=63.912, acc=0.678, loss=68.787, backward_time=1.095, grad_norm=86.030, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.260e-04, train_time=3.978 +[gpub001:0/64] 2023-07-04 02:54:31,632 (trainer:732) INFO: 9epoch:train:8801-8900batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=81.677, loss_att=59.992, acc=0.684, loss=66.498, backward_time=1.097, grad_norm=75.414, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.259e-04, train_time=3.486 +[gpub001:0/64] 2023-07-04 02:57:46,255 (trainer:732) INFO: 9epoch:train:8901-9000batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=76.679, loss_att=55.296, acc=0.689, loss=61.711, backward_time=1.165, grad_norm=99.487, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.258e-04, train_time=3.892 +[gpub001:0/64] 2023-07-04 02:58:06,283 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-04 02:58:29,174 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 02:58:33,478 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 02:58:33,478 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpub001:0/64] 2023-07-04 02:58:33,485 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 03:05:00,125 (trainer:732) INFO: 9epoch:train:9001-9100batch: iter_time=1.878, forward_time=0.187, loss_ctc=86.043, loss_att=63.578, acc=0.668, loss=70.318, backward_time=1.044, grad_norm=92.448, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.184, optim0_lr0=1.257e-04, train_time=8.677 +[gpub001:0/64] 2023-07-04 03:07:20,404 (trainer:732) INFO: 9epoch:train:9101-9200batch: iter_time=1.129e-04, forward_time=0.144, loss_ctc=73.663, loss_att=55.071, acc=0.654, loss=60.649, backward_time=1.032, grad_norm=88.403, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.257e-04, train_time=2.806 +[gpub001:0/64] 2023-07-04 03:09:37,950 (trainer:732) INFO: 9epoch:train:9201-9300batch: iter_time=1.218e-04, forward_time=0.143, loss_ctc=80.458, loss_att=62.287, acc=0.676, loss=67.738, backward_time=1.027, grad_norm=92.966, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.256e-04, train_time=2.751 +[gpub001:0/64] 2023-07-04 03:11:53,135 (trainer:732) INFO: 9epoch:train:9301-9400batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=66.752, loss_att=50.047, acc=0.665, loss=55.059, backward_time=1.022, grad_norm=85.205, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.255e-04, train_time=2.703 +[gpub001:0/64] 2023-07-04 03:14:19,601 (trainer:732) INFO: 9epoch:train:9401-9500batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=80.260, loss_att=63.174, acc=0.665, loss=68.300, backward_time=1.039, grad_norm=86.691, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.254e-04, train_time=2.929 +[gpub001:0/64] 2023-07-04 03:17:07,879 (trainer:732) INFO: 9epoch:train:9501-9600batch: iter_time=1.143e-04, forward_time=0.150, loss_ctc=73.284, loss_att=61.761, acc=0.657, loss=65.218, backward_time=1.062, grad_norm=97.161, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.254e-04, train_time=3.365 +[gpub001:0/64] 2023-07-04 03:19:34,610 (trainer:732) INFO: 9epoch:train:9601-9700batch: iter_time=5.961e-04, forward_time=0.161, loss_ctc=66.042, loss_att=48.731, acc=0.682, loss=53.924, backward_time=1.039, grad_norm=76.636, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.253e-04, train_time=2.934 +[gpub001:0/64] 2023-07-04 03:21:58,061 (trainer:732) INFO: 9epoch:train:9701-9800batch: iter_time=1.166e-04, forward_time=0.175, loss_ctc=78.654, loss_att=65.270, acc=0.669, loss=69.285, backward_time=1.036, grad_norm=89.956, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.184, optim0_lr0=1.252e-04, train_time=2.869 +[gpub001:0/64] 2023-07-04 03:24:35,779 (trainer:732) INFO: 9epoch:train:9801-9900batch: iter_time=1.327e-04, forward_time=0.166, loss_ctc=81.981, loss_att=61.082, acc=0.676, loss=67.352, backward_time=1.044, grad_norm=85.085, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.251e-04, train_time=3.154 +[gpub001:0/64] 2023-07-04 03:27:16,735 (trainer:732) INFO: 9epoch:train:9901-10000batch: iter_time=1.079e-04, forward_time=0.171, loss_ctc=77.652, loss_att=56.185, acc=0.682, loss=62.625, backward_time=1.047, grad_norm=92.946, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.250e-04, train_time=3.219 +[gpub001:0/64] 2023-07-04 03:40:25,652 (trainer:338) INFO: 9epoch results: [train] iter_time=0.199, forward_time=0.157, loss_ctc=77.876, loss_att=59.714, acc=0.667, loss=65.163, backward_time=1.051, grad_norm=90.766, clip=100.000, loss_scale=2.233e+10, optim_step_time=0.182, optim0_lr0=1.292e-04, train_time=3.584, time=4 hours, 59 minutes and 3.18 seconds, total_count=60000, gpu_max_cached_mem_GB=34.164, [valid] loss_ctc=58.837, cer_ctc=0.322, loss_att=48.196, acc=0.608, cer=0.461, wer=0.998, loss=51.388, time=6 minutes and 52.63 seconds, total_count=6578, gpu_max_cached_mem_GB=37.459, [att_plot] time=5 minutes and 53.06 seconds, total_count=0, gpu_max_cached_mem_GB=37.459 +[gpub001:0/64] 2023-07-04 03:40:45,004 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count +[gpub001:0/64] 2023-07-04 03:40:45,009 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/4epoch.pth +[gpub001:0/64] 2023-07-04 03:40:45,077 (trainer:272) INFO: 10/100epoch started. Estimated time to finish: 2 weeks, 5 days and 17 hours +[gpub001:0/64] 2023-07-04 03:40:46,338 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-04 03:41:10,884 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 03:41:15,265 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 03:41:15,265 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, +[gpub001:0/64] 2023-07-04 03:41:15,312 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 03:52:28,581 (trainer:732) INFO: 10epoch:train:1-100batch: iter_time=5.563, forward_time=0.206, loss_ctc=74.661, loss_att=61.707, acc=0.668, loss=65.594, backward_time=1.045, grad_norm=92.048, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.187, optim0_lr0=1.250e-04, train_time=14.055 +[gpub001:0/64] 2023-07-04 03:54:52,229 (trainer:732) INFO: 10epoch:train:101-200batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=79.571, loss_att=57.713, acc=0.649, loss=64.270, backward_time=1.039, grad_norm=111.566, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.249e-04, train_time=2.875 +[gpub001:0/64] 2023-07-04 03:57:21,049 (trainer:732) INFO: 10epoch:train:201-300batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=83.086, loss_att=62.207, acc=0.668, loss=68.471, backward_time=1.041, grad_norm=91.324, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.248e-04, train_time=2.976 +[gpub001:0/64] 2023-07-04 04:00:00,041 (trainer:732) INFO: 10epoch:train:301-400batch: iter_time=0.002, forward_time=0.201, loss_ctc=95.823, loss_att=89.716, acc=0.632, loss=91.548, backward_time=1.115, grad_norm=100.695, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.189, optim0_lr0=1.247e-04, train_time=3.179 +[gpub001:0/64] 2023-07-04 04:02:29,261 (trainer:732) INFO: 10epoch:train:401-500batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=85.114, loss_att=65.608, acc=0.636, loss=71.460, backward_time=1.044, grad_norm=99.338, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.246e-04, train_time=2.984 +[gpub001:0/64] 2023-07-04 04:05:16,066 (trainer:732) INFO: 10epoch:train:501-600batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=82.793, loss_att=59.421, acc=0.686, loss=66.432, backward_time=1.067, grad_norm=93.603, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.246e-04, train_time=3.336 +[gpub001:0/64] 2023-07-04 04:08:01,476 (trainer:732) INFO: 10epoch:train:601-700batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=80.618, loss_att=66.338, acc=0.662, loss=70.622, backward_time=1.058, grad_norm=93.226, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.245e-04, train_time=3.308 +[gpub001:0/64] 2023-07-04 04:10:30,382 (trainer:732) INFO: 10epoch:train:701-800batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.601, loss_att=54.680, acc=0.656, loss=58.856, backward_time=1.044, grad_norm=79.552, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.244e-04, train_time=2.978 +[gpub001:0/64] 2023-07-04 04:12:53,237 (trainer:732) INFO: 10epoch:train:801-900batch: iter_time=1.028e-04, forward_time=0.145, loss_ctc=85.022, loss_att=59.685, acc=0.652, loss=67.286, backward_time=1.036, grad_norm=92.734, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.243e-04, train_time=2.857 +[gpub001:0/64] 2023-07-04 04:15:35,870 (trainer:732) INFO: 10epoch:train:901-1000batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=78.810, loss_att=62.834, acc=0.664, loss=67.627, backward_time=1.081, grad_norm=81.075, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.243e-04, train_time=3.252 +[gpub001:0/64] 2023-07-04 04:15:49,814 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-04 04:16:11,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 04:16:16,315 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 04:16:16,315 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, +[gpub001:0/64] 2023-07-04 04:16:16,322 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 04:22:44,047 (trainer:732) INFO: 10epoch:train:1001-1100batch: iter_time=2.290, forward_time=0.192, loss_ctc=71.839, loss_att=58.508, acc=0.676, loss=62.507, backward_time=1.050, grad_norm=82.985, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.185, optim0_lr0=1.242e-04, train_time=8.563 +[gpub001:0/64] 2023-07-04 04:25:13,405 (trainer:732) INFO: 10epoch:train:1101-1200batch: iter_time=1.072e-04, forward_time=0.144, loss_ctc=77.842, loss_att=57.499, acc=0.651, loss=63.602, backward_time=1.049, grad_norm=92.682, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.241e-04, train_time=2.987 +[gpub001:0/64] 2023-07-04 04:28:16,167 (trainer:732) INFO: 10epoch:train:1201-1300batch: iter_time=1.018e-04, forward_time=0.146, loss_ctc=79.271, loss_att=59.359, acc=0.677, loss=65.332, backward_time=1.078, grad_norm=81.278, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.240e-04, train_time=3.655 +[gpub001:0/64] 2023-07-04 04:30:50,171 (trainer:732) INFO: 10epoch:train:1301-1400batch: iter_time=1.297e-04, forward_time=0.145, loss_ctc=93.983, loss_att=86.915, acc=0.636, loss=89.035, backward_time=1.054, grad_norm=96.846, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.240e-04, train_time=3.080 +[gpub001:0/64] 2023-07-04 04:33:24,726 (trainer:732) INFO: 10epoch:train:1401-1500batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=82.254, loss_att=62.424, acc=0.642, loss=68.373, backward_time=1.061, grad_norm=95.559, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.239e-04, train_time=3.091 +[gpub001:0/64] 2023-07-04 04:36:06,544 (trainer:732) INFO: 10epoch:train:1501-1600batch: iter_time=1.104e-04, forward_time=0.143, loss_ctc=83.817, loss_att=59.295, acc=0.686, loss=66.652, backward_time=1.055, grad_norm=85.857, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.238e-04, train_time=3.236 +[gpub001:0/64] 2023-07-04 04:38:46,522 (trainer:732) INFO: 10epoch:train:1601-1700batch: iter_time=9.965e-05, forward_time=0.145, loss_ctc=80.193, loss_att=65.391, acc=0.664, loss=69.831, backward_time=1.061, grad_norm=85.978, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.237e-04, train_time=3.199 +[gpub001:0/64] 2023-07-04 04:41:26,350 (trainer:732) INFO: 10epoch:train:1701-1800batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=70.011, loss_att=53.793, acc=0.662, loss=58.658, backward_time=1.055, grad_norm=85.885, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.237e-04, train_time=3.196 +[gpub001:0/64] 2023-07-04 04:44:09,017 (trainer:732) INFO: 10epoch:train:1801-1900batch: iter_time=2.201e-04, forward_time=0.181, loss_ctc=83.456, loss_att=59.210, acc=0.658, loss=66.484, backward_time=1.066, grad_norm=92.746, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.187, optim0_lr0=1.236e-04, train_time=3.253 +[gpub001:0/64] 2023-07-04 04:46:37,714 (trainer:732) INFO: 10epoch:train:1901-2000batch: iter_time=1.199e-04, forward_time=0.183, loss_ctc=77.646, loss_att=63.155, acc=0.668, loss=67.502, backward_time=1.048, grad_norm=81.185, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.186, optim0_lr0=1.235e-04, train_time=2.974 +[gpub001:0/64] 2023-07-04 04:46:55,557 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub001:0/64] 2023-07-04 04:47:17,723 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 04:47:22,197 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 04:47:22,197 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, +[gpub001:0/64] 2023-07-04 04:47:22,204 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 04:52:58,643 (trainer:732) INFO: 10epoch:train:2001-2100batch: iter_time=2.218, forward_time=0.151, loss_ctc=71.055, loss_att=59.286, acc=0.684, loss=62.817, backward_time=1.049, grad_norm=84.270, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.234e-04, train_time=7.619 +[gpub001:0/64] 2023-07-04 04:55:14,620 (trainer:732) INFO: 10epoch:train:2101-2200batch: iter_time=1.359e-04, forward_time=0.145, loss_ctc=77.590, loss_att=57.580, acc=0.661, loss=63.583, backward_time=1.027, grad_norm=85.515, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.234e-04, train_time=2.719 +[gpub001:0/64] 2023-07-04 04:57:30,708 (trainer:732) INFO: 10epoch:train:2201-2300batch: iter_time=1.257e-04, forward_time=0.147, loss_ctc=80.305, loss_att=59.243, acc=0.682, loss=65.561, backward_time=1.029, grad_norm=96.055, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.233e-04, train_time=2.722 +[gpub001:0/64] 2023-07-04 05:00:02,444 (trainer:732) INFO: 10epoch:train:2301-2400batch: iter_time=1.220e-04, forward_time=0.148, loss_ctc=91.760, loss_att=85.779, acc=0.656, loss=87.573, backward_time=1.057, grad_norm=104.082, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.232e-04, train_time=3.034 +[gpub001:0/64] 2023-07-04 05:02:21,395 (trainer:732) INFO: 10epoch:train:2401-2500batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=82.408, loss_att=62.987, acc=0.655, loss=68.813, backward_time=1.033, grad_norm=110.637, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.231e-04, train_time=2.779 +[gpub001:0/64] 2023-07-04 05:04:43,190 (trainer:732) INFO: 10epoch:train:2501-2600batch: iter_time=1.250e-04, forward_time=0.146, loss_ctc=81.831, loss_att=58.991, acc=0.696, loss=65.843, backward_time=1.038, grad_norm=132.918, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.231e-04, train_time=2.836 +[gpub001:0/64] 2023-07-04 05:07:37,477 (trainer:732) INFO: 10epoch:train:2601-2700batch: iter_time=1.360e-04, forward_time=0.146, loss_ctc=79.920, loss_att=63.924, acc=0.673, loss=68.723, backward_time=1.119, grad_norm=92.002, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.230e-04, train_time=3.486 +[gpub001:0/64] 2023-07-04 05:10:26,668 (trainer:732) INFO: 10epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=68.184, loss_att=53.646, acc=0.669, loss=58.007, backward_time=1.089, grad_norm=101.305, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.229e-04, train_time=3.384 +[gpub001:0/64] 2023-07-04 05:13:12,189 (trainer:732) INFO: 10epoch:train:2801-2900batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=82.436, loss_att=57.796, acc=0.672, loss=65.188, backward_time=1.060, grad_norm=88.713, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.228e-04, train_time=3.310 +[gpub001:0/64] 2023-07-04 05:15:42,484 (trainer:732) INFO: 10epoch:train:2901-3000batch: iter_time=1.035e-04, forward_time=0.146, loss_ctc=77.301, loss_att=60.873, acc=0.678, loss=65.801, backward_time=1.049, grad_norm=93.069, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.228e-04, train_time=3.006 +[gpub001:0/64] 2023-07-04 05:15:44,046 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub001:0/64] 2023-07-04 05:16:06,754 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 05:16:11,272 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 05:16:11,272 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, +[gpub001:0/64] 2023-07-04 05:16:11,280 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 05:23:59,629 (trainer:732) INFO: 10epoch:train:3001-3100batch: iter_time=1.599, forward_time=0.223, loss_ctc=72.196, loss_att=59.196, acc=0.672, loss=63.096, backward_time=1.053, grad_norm=81.648, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.187, optim0_lr0=1.227e-04, train_time=9.943 +[gpub001:0/64] 2023-07-04 05:26:40,677 (trainer:732) INFO: 10epoch:train:3101-3200batch: iter_time=1.388e-04, forward_time=0.145, loss_ctc=75.881, loss_att=56.536, acc=0.656, loss=62.340, backward_time=1.067, grad_norm=82.455, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.226e-04, train_time=3.221 +[gpub001:0/64] 2023-07-04 05:29:15,619 (trainer:732) INFO: 10epoch:train:3201-3300batch: iter_time=1.326e-04, forward_time=0.147, loss_ctc=79.697, loss_att=58.130, acc=0.679, loss=64.600, backward_time=1.053, grad_norm=94.457, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.225e-04, train_time=3.099 +[gpub001:0/64] 2023-07-04 05:31:55,579 (trainer:732) INFO: 10epoch:train:3301-3400batch: iter_time=1.271e-04, forward_time=0.146, loss_ctc=91.505, loss_att=85.159, acc=0.645, loss=87.063, backward_time=1.071, grad_norm=111.923, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.225e-04, train_time=3.199 +[gpub001:0/64] 2023-07-04 05:35:09,355 (trainer:732) INFO: 10epoch:train:3401-3500batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=80.885, loss_att=61.538, acc=0.648, loss=67.342, backward_time=1.126, grad_norm=93.020, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.224e-04, train_time=3.875 +[gpub001:0/64] 2023-07-04 05:38:07,869 (trainer:732) INFO: 10epoch:train:3501-3600batch: iter_time=1.136e-04, forward_time=0.147, loss_ctc=79.992, loss_att=57.211, acc=0.695, loss=64.045, backward_time=1.084, grad_norm=91.127, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.223e-04, train_time=3.570 +[gpub001:0/64] 2023-07-04 05:41:06,528 (trainer:732) INFO: 10epoch:train:3601-3700batch: iter_time=1.135e-04, forward_time=0.145, loss_ctc=78.588, loss_att=63.951, acc=0.669, loss=68.342, backward_time=1.080, grad_norm=95.189, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.222e-04, train_time=3.573 +[gpub001:0/64] 2023-07-04 05:43:41,455 (trainer:732) INFO: 10epoch:train:3701-3800batch: iter_time=1.262e-04, forward_time=0.146, loss_ctc=67.525, loss_att=52.975, acc=0.664, loss=57.340, backward_time=1.047, grad_norm=79.137, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.222e-04, train_time=3.098 +[gpub001:0/64] 2023-07-04 05:46:38,398 (trainer:732) INFO: 10epoch:train:3801-3900batch: iter_time=1.189e-04, forward_time=0.146, loss_ctc=83.184, loss_att=57.702, acc=0.661, loss=65.347, backward_time=1.081, grad_norm=85.750, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.221e-04, train_time=3.539 +[gpub001:0/64] 2023-07-04 05:49:41,596 (trainer:732) INFO: 10epoch:train:3901-4000batch: iter_time=7.671e-04, forward_time=0.232, loss_ctc=78.322, loss_att=62.743, acc=0.672, loss=67.416, backward_time=1.090, grad_norm=78.715, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.189, optim0_lr0=1.220e-04, train_time=3.664 +[gpub001:0/64] 2023-07-04 05:49:54,817 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub001:0/64] 2023-07-04 05:50:17,125 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 05:50:21,327 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 05:50:21,327 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, +[gpub001:0/64] 2023-07-04 05:50:21,337 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 05:57:30,927 (trainer:732) INFO: 10epoch:train:4001-4100batch: iter_time=2.608, forward_time=0.189, loss_ctc=70.837, loss_att=58.363, acc=0.687, loss=62.105, backward_time=1.088, grad_norm=80.611, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.186, optim0_lr0=1.219e-04, train_time=9.386 +[gpub001:0/64] 2023-07-04 05:59:46,877 (trainer:732) INFO: 10epoch:train:4101-4200batch: iter_time=1.047e-04, forward_time=0.146, loss_ctc=76.833, loss_att=56.960, acc=0.666, loss=62.922, backward_time=1.029, grad_norm=98.324, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.219e-04, train_time=2.719 +[gpub001:0/64] 2023-07-04 06:02:08,031 (trainer:732) INFO: 10epoch:train:4201-4300batch: iter_time=1.282e-04, forward_time=0.149, loss_ctc=77.560, loss_att=57.872, acc=0.688, loss=63.779, backward_time=1.038, grad_norm=85.425, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.218e-04, train_time=2.823 +[gpub001:0/64] 2023-07-04 06:04:26,653 (trainer:732) INFO: 10epoch:train:4301-4400batch: iter_time=1.141e-04, forward_time=0.147, loss_ctc=92.369, loss_att=84.411, acc=0.657, loss=86.799, backward_time=1.034, grad_norm=104.688, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.217e-04, train_time=2.772 +[gpub001:0/64] 2023-07-04 06:06:42,171 (trainer:732) INFO: 10epoch:train:4401-4500batch: iter_time=1.068e-04, forward_time=0.146, loss_ctc=80.486, loss_att=61.536, acc=0.660, loss=67.221, backward_time=1.025, grad_norm=95.744, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.217e-04, train_time=2.710 +[gpub001:0/64] 2023-07-04 06:08:58,348 (trainer:732) INFO: 10epoch:train:4501-4600batch: iter_time=1.054e-04, forward_time=0.147, loss_ctc=80.334, loss_att=58.029, acc=0.703, loss=64.720, backward_time=1.031, grad_norm=100.954, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.216e-04, train_time=2.723 +[gpub001:0/64] 2023-07-04 06:11:28,165 (trainer:732) INFO: 10epoch:train:4601-4700batch: iter_time=1.078e-04, forward_time=0.146, loss_ctc=77.401, loss_att=63.987, acc=0.673, loss=68.011, backward_time=1.064, grad_norm=87.364, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.215e-04, train_time=2.996 +[gpub001:0/64] 2023-07-04 06:14:13,116 (trainer:732) INFO: 10epoch:train:4701-4800batch: iter_time=1.077e-04, forward_time=0.147, loss_ctc=66.979, loss_att=52.793, acc=0.677, loss=57.049, backward_time=1.061, grad_norm=71.593, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.214e-04, train_time=3.299 +[gpub001:0/64] 2023-07-04 06:16:43,533 (trainer:732) INFO: 10epoch:train:4801-4900batch: iter_time=1.076e-04, forward_time=0.162, loss_ctc=83.390, loss_att=59.533, acc=0.672, loss=66.690, backward_time=1.046, grad_norm=101.548, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.214e-04, train_time=3.008 +[gpub001:0/64] 2023-07-04 06:19:33,266 (trainer:732) INFO: 10epoch:train:4901-5000batch: iter_time=5.750e-04, forward_time=0.213, loss_ctc=76.339, loss_att=59.292, acc=0.682, loss=64.406, backward_time=1.106, grad_norm=94.146, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.187, optim0_lr0=1.213e-04, train_time=3.394 +[gpub001:0/64] 2023-07-04 06:19:53,288 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub001:0/64] 2023-07-04 06:20:15,638 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 06:20:19,922 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 06:20:19,922 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpub001:0/64] 2023-07-04 06:20:19,929 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 06:27:20,326 (trainer:732) INFO: 10epoch:train:5001-5100batch: iter_time=2.350, forward_time=0.170, loss_ctc=71.177, loss_att=57.991, acc=0.682, loss=61.947, backward_time=1.051, grad_norm=82.378, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.185, optim0_lr0=1.212e-04, train_time=9.341 +[gpub001:0/64] 2023-07-04 06:29:40,314 (trainer:732) INFO: 10epoch:train:5101-5200batch: iter_time=1.100e-04, forward_time=0.146, loss_ctc=75.335, loss_att=55.900, acc=0.661, loss=61.731, backward_time=1.032, grad_norm=90.481, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.212e-04, train_time=2.800 +[gpub001:0/64] 2023-07-04 06:31:57,088 (trainer:732) INFO: 10epoch:train:5201-5300batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=79.123, loss_att=58.087, acc=0.682, loss=64.398, backward_time=1.031, grad_norm=88.409, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.211e-04, train_time=2.735 +[gpub001:0/64] 2023-07-04 06:34:23,229 (trainer:732) INFO: 10epoch:train:5301-5400batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=90.993, loss_att=83.458, acc=0.647, loss=85.719, backward_time=1.042, grad_norm=94.734, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.210e-04, train_time=2.923 +[gpub001:0/64] 2023-07-04 06:36:38,714 (trainer:732) INFO: 10epoch:train:5401-5500batch: iter_time=1.152e-04, forward_time=0.145, loss_ctc=80.319, loss_att=60.819, acc=0.653, loss=66.669, backward_time=1.025, grad_norm=93.349, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.209e-04, train_time=2.709 +[gpub001:0/64] 2023-07-04 06:38:58,311 (trainer:732) INFO: 10epoch:train:5501-5600batch: iter_time=1.103e-04, forward_time=0.145, loss_ctc=79.823, loss_att=56.920, acc=0.697, loss=63.791, backward_time=1.031, grad_norm=86.237, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.209e-04, train_time=2.792 +[gpub001:0/64] 2023-07-04 06:41:19,825 (trainer:732) INFO: 10epoch:train:5601-5700batch: iter_time=1.113e-04, forward_time=0.144, loss_ctc=79.289, loss_att=64.586, acc=0.666, loss=68.997, backward_time=1.035, grad_norm=117.701, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.208e-04, train_time=2.830 +[gpub001:0/64] 2023-07-04 06:43:36,922 (trainer:732) INFO: 10epoch:train:5701-5800batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=65.745, loss_att=51.767, acc=0.674, loss=55.961, backward_time=1.030, grad_norm=84.338, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.207e-04, train_time=2.742 +[gpub001:0/64] 2023-07-04 06:46:10,640 (trainer:732) INFO: 10epoch:train:5801-5900batch: iter_time=5.857e-04, forward_time=0.153, loss_ctc=83.685, loss_att=57.586, acc=0.660, loss=65.415, backward_time=1.047, grad_norm=90.102, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.207e-04, train_time=3.074 +[gpub001:0/64] 2023-07-04 06:48:48,486 (trainer:732) INFO: 10epoch:train:5901-6000batch: iter_time=1.104e-04, forward_time=0.190, loss_ctc=76.571, loss_att=62.067, acc=0.674, loss=66.418, backward_time=1.059, grad_norm=83.226, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.206e-04, train_time=3.157 +[gpub001:0/64] 2023-07-04 06:48:54,345 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub001:0/64] 2023-07-04 06:49:17,118 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 06:49:21,432 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 06:49:21,432 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpub001:0/64] 2023-07-04 06:49:21,440 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 06:54:57,248 (trainer:732) INFO: 10epoch:train:6001-6100batch: iter_time=1.887, forward_time=0.176, loss_ctc=72.668, loss_att=58.875, acc=0.681, loss=63.013, backward_time=1.047, grad_norm=78.204, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.185, optim0_lr0=1.205e-04, train_time=7.374 +[gpub001:0/64] 2023-07-04 06:57:13,494 (trainer:732) INFO: 10epoch:train:6101-6200batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=75.100, loss_att=56.125, acc=0.660, loss=61.817, backward_time=1.028, grad_norm=96.151, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.205e-04, train_time=2.726 +[gpub001:0/64] 2023-07-04 06:59:29,483 (trainer:732) INFO: 10epoch:train:6201-6300batch: iter_time=1.260e-04, forward_time=0.149, loss_ctc=78.803, loss_att=58.253, acc=0.683, loss=64.418, backward_time=1.028, grad_norm=92.452, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.204e-04, train_time=2.720 +[gpub001:0/64] 2023-07-04 07:01:57,947 (trainer:732) INFO: 10epoch:train:6301-6400batch: iter_time=1.262e-04, forward_time=0.166, loss_ctc=92.014, loss_att=84.009, acc=0.648, loss=86.411, backward_time=1.045, grad_norm=99.864, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.185, optim0_lr0=1.203e-04, train_time=2.969 +[gpub001:0/64] 2023-07-04 07:04:34,258 (trainer:732) INFO: 10epoch:train:6401-6500batch: iter_time=1.256e-04, forward_time=0.156, loss_ctc=78.738, loss_att=59.782, acc=0.657, loss=65.469, backward_time=1.119, grad_norm=101.854, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.185, optim0_lr0=1.202e-04, train_time=3.126 +[gpub001:0/64] 2023-07-04 07:06:55,517 (trainer:732) INFO: 10epoch:train:6501-6600batch: iter_time=1.241e-04, forward_time=0.168, loss_ctc=79.093, loss_att=56.679, acc=0.698, loss=63.403, backward_time=1.032, grad_norm=82.095, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.202e-04, train_time=2.825 +[gpub001:0/64] 2023-07-04 07:09:19,221 (trainer:732) INFO: 10epoch:train:6601-6700batch: iter_time=1.178e-04, forward_time=0.177, loss_ctc=77.971, loss_att=64.064, acc=0.671, loss=68.236, backward_time=1.057, grad_norm=85.100, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.201e-04, train_time=2.874 +[gpub001:0/64] 2023-07-04 07:11:39,694 (trainer:732) INFO: 10epoch:train:6701-6800batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=66.743, loss_att=52.391, acc=0.671, loss=56.697, backward_time=1.034, grad_norm=88.159, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.200e-04, train_time=2.809 +[gpub001:0/64] 2023-07-04 07:14:01,640 (trainer:732) INFO: 10epoch:train:6801-6900batch: iter_time=1.103e-04, forward_time=0.147, loss_ctc=82.598, loss_att=57.601, acc=0.667, loss=65.100, backward_time=1.035, grad_norm=88.853, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.200e-04, train_time=2.839 +[gpub001:0/64] 2023-07-04 07:16:24,881 (trainer:732) INFO: 10epoch:train:6901-7000batch: iter_time=1.054e-04, forward_time=0.147, loss_ctc=74.921, loss_att=60.937, acc=0.676, loss=65.132, backward_time=1.031, grad_norm=86.997, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.199e-04, train_time=2.865 +[gpub001:0/64] 2023-07-04 07:16:37,979 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub001:0/64] 2023-07-04 07:17:00,253 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 07:17:04,527 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 07:17:04,527 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, +[gpub001:0/64] 2023-07-04 07:17:04,630 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 07:25:43,021 (trainer:732) INFO: 10epoch:train:7001-7100batch: iter_time=2.361, forward_time=0.227, loss_ctc=70.630, loss_att=58.023, acc=0.692, loss=61.805, backward_time=1.046, grad_norm=83.473, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.188, optim0_lr0=1.198e-04, train_time=11.161 +[gpub001:0/64] 2023-07-04 07:27:58,795 (trainer:732) INFO: 10epoch:train:7101-7200batch: iter_time=1.384e-04, forward_time=0.145, loss_ctc=74.050, loss_att=55.642, acc=0.672, loss=61.165, backward_time=1.026, grad_norm=90.165, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.198e-04, train_time=2.717 +[gpub001:0/64] 2023-07-04 07:30:21,144 (trainer:732) INFO: 10epoch:train:7201-7300batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=77.786, loss_att=57.538, acc=0.689, loss=63.612, backward_time=1.035, grad_norm=81.644, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.197e-04, train_time=2.847 +[gpub001:0/64] 2023-07-04 07:32:41,864 (trainer:732) INFO: 10epoch:train:7301-7400batch: iter_time=9.930e-05, forward_time=0.147, loss_ctc=90.070, loss_att=83.807, acc=0.661, loss=85.686, backward_time=1.045, grad_norm=98.326, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.196e-04, train_time=2.814 +[gpub001:0/64] 2023-07-04 07:35:08,429 (trainer:732) INFO: 10epoch:train:7401-7500batch: iter_time=1.019e-04, forward_time=0.145, loss_ctc=79.856, loss_att=61.766, acc=0.661, loss=67.193, backward_time=1.040, grad_norm=89.441, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.196e-04, train_time=2.931 +[gpub001:0/64] 2023-07-04 07:37:26,655 (trainer:732) INFO: 10epoch:train:7501-7600batch: iter_time=1.190e-04, forward_time=0.145, loss_ctc=81.266, loss_att=58.514, acc=0.702, loss=65.340, backward_time=1.030, grad_norm=92.735, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.195e-04, train_time=2.764 +[gpub001:0/64] 2023-07-04 07:40:02,165 (trainer:732) INFO: 10epoch:train:7601-7700batch: iter_time=5.351e-04, forward_time=0.147, loss_ctc=76.333, loss_att=62.310, acc=0.677, loss=66.517, backward_time=1.056, grad_norm=79.911, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.194e-04, train_time=3.110 +[gpub001:0/64] 2023-07-04 07:42:23,167 (trainer:732) INFO: 10epoch:train:7701-7800batch: iter_time=9.641e-05, forward_time=0.145, loss_ctc=66.356, loss_att=52.050, acc=0.681, loss=56.342, backward_time=1.039, grad_norm=73.483, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.194e-04, train_time=2.820 +[gpub001:0/64] 2023-07-04 07:44:57,556 (trainer:732) INFO: 10epoch:train:7801-7900batch: iter_time=9.989e-05, forward_time=0.155, loss_ctc=81.235, loss_att=57.248, acc=0.677, loss=64.444, backward_time=1.065, grad_norm=86.881, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.193e-04, train_time=3.088 +[gpub001:0/64] 2023-07-04 07:48:09,978 (trainer:732) INFO: 10epoch:train:7901-8000batch: iter_time=1.022e-04, forward_time=0.170, loss_ctc=76.555, loss_att=60.229, acc=0.685, loss=65.127, backward_time=1.097, grad_norm=79.075, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.192e-04, train_time=3.848 +[gpub001:0/64] 2023-07-04 07:48:25,594 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub001:0/64] 2023-07-04 07:48:48,049 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 07:48:52,289 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 07:48:52,289 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, +[gpub001:0/64] 2023-07-04 07:48:52,297 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 07:55:59,186 (trainer:732) INFO: 10epoch:train:8001-8100batch: iter_time=2.602, forward_time=0.191, loss_ctc=70.351, loss_att=57.246, acc=0.695, loss=61.178, backward_time=1.076, grad_norm=77.703, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.185, optim0_lr0=1.191e-04, train_time=9.384 +[gpub001:0/64] 2023-07-04 07:58:31,433 (trainer:732) INFO: 10epoch:train:8101-8200batch: iter_time=1.315e-04, forward_time=0.146, loss_ctc=74.708, loss_att=54.669, acc=0.672, loss=60.681, backward_time=1.055, grad_norm=103.432, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.191e-04, train_time=3.045 +[gpub001:0/64] 2023-07-04 08:01:03,790 (trainer:732) INFO: 10epoch:train:8201-8300batch: iter_time=1.264e-04, forward_time=0.147, loss_ctc=77.423, loss_att=57.831, acc=0.689, loss=63.709, backward_time=1.050, grad_norm=101.802, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.190e-04, train_time=3.047 +[gpub001:0/64] 2023-07-04 08:03:43,672 (trainer:732) INFO: 10epoch:train:8301-8400batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=90.631, loss_att=83.753, acc=0.660, loss=85.816, backward_time=1.067, grad_norm=95.528, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.189e-04, train_time=3.197 +[gpub001:0/64] 2023-07-04 08:06:18,852 (trainer:732) INFO: 10epoch:train:8401-8500batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=78.090, loss_att=59.472, acc=0.668, loss=65.057, backward_time=1.065, grad_norm=97.820, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.189e-04, train_time=3.103 +[gpub001:0/64] 2023-07-04 08:08:44,080 (trainer:732) INFO: 10epoch:train:8501-8600batch: iter_time=1.136e-04, forward_time=0.147, loss_ctc=81.800, loss_att=58.282, acc=0.704, loss=65.338, backward_time=1.043, grad_norm=86.480, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.188e-04, train_time=2.904 +[gpub001:0/64] 2023-07-04 08:11:19,247 (trainer:732) INFO: 10epoch:train:8601-8700batch: iter_time=1.271e-04, forward_time=0.146, loss_ctc=77.659, loss_att=64.031, acc=0.675, loss=68.119, backward_time=1.051, grad_norm=85.748, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.187e-04, train_time=3.103 +[gpub001:0/64] 2023-07-04 08:14:02,057 (trainer:732) INFO: 10epoch:train:8701-8800batch: iter_time=1.326e-04, forward_time=0.146, loss_ctc=66.799, loss_att=52.036, acc=0.681, loss=56.465, backward_time=1.059, grad_norm=73.862, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.187e-04, train_time=3.256 +[gpub001:0/64] 2023-07-04 08:16:37,378 (trainer:732) INFO: 10epoch:train:8801-8900batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=82.147, loss_att=57.347, acc=0.680, loss=64.787, backward_time=1.052, grad_norm=89.496, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.186e-04, train_time=3.106 +[gpub001:0/64] 2023-07-04 08:19:08,726 (trainer:732) INFO: 10epoch:train:8901-9000batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=75.952, loss_att=59.955, acc=0.682, loss=64.755, backward_time=1.047, grad_norm=84.576, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.185e-04, train_time=3.027 +[gpub001:0/64] 2023-07-04 08:19:28,754 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub001:0/64] 2023-07-04 08:19:51,073 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 08:19:55,348 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 08:19:55,348 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, +[gpub001:0/64] 2023-07-04 08:19:55,408 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 08:26:15,955 (trainer:732) INFO: 10epoch:train:9001-9100batch: iter_time=2.440, forward_time=0.246, loss_ctc=70.151, loss_att=57.464, acc=0.695, loss=61.270, backward_time=1.053, grad_norm=83.805, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.187, optim0_lr0=1.185e-04, train_time=8.544 +[gpub001:0/64] 2023-07-04 08:28:32,451 (trainer:732) INFO: 10epoch:train:9101-9200batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=75.009, loss_att=55.262, acc=0.672, loss=61.186, backward_time=1.028, grad_norm=94.917, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.184e-04, train_time=2.730 +[gpub001:0/64] 2023-07-04 08:30:51,931 (trainer:732) INFO: 10epoch:train:9201-9300batch: iter_time=1.173e-04, forward_time=0.148, loss_ctc=77.181, loss_att=56.772, acc=0.694, loss=62.895, backward_time=1.031, grad_norm=83.826, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.184, optim0_lr0=1.183e-04, train_time=2.789 +[gpub001:0/64] 2023-07-04 08:33:11,956 (trainer:732) INFO: 10epoch:train:9301-9400batch: iter_time=1.225e-04, forward_time=0.149, loss_ctc=87.476, loss_att=81.540, acc=0.668, loss=83.321, backward_time=1.037, grad_norm=99.964, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.183e-04, train_time=2.800 +[gpub001:0/64] 2023-07-04 08:35:38,898 (trainer:732) INFO: 10epoch:train:9401-9500batch: iter_time=1.206e-04, forward_time=0.147, loss_ctc=78.958, loss_att=59.832, acc=0.663, loss=65.570, backward_time=1.040, grad_norm=101.896, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.182e-04, train_time=2.939 +[gpub001:0/64] 2023-07-04 08:38:05,571 (trainer:732) INFO: 10epoch:train:9501-9600batch: iter_time=1.192e-04, forward_time=0.147, loss_ctc=79.300, loss_att=57.356, acc=0.708, loss=63.939, backward_time=1.043, grad_norm=85.796, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.181e-04, train_time=2.933 +[gpub001:0/64] 2023-07-04 08:40:51,798 (trainer:732) INFO: 10epoch:train:9601-9700batch: iter_time=1.288e-04, forward_time=0.148, loss_ctc=78.120, loss_att=64.613, acc=0.677, loss=68.665, backward_time=1.077, grad_norm=116.603, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.181e-04, train_time=3.324 +[gpub001:0/64] 2023-07-04 08:43:38,644 (trainer:732) INFO: 10epoch:train:9701-9800batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=66.807, loss_att=51.983, acc=0.683, loss=56.430, backward_time=1.086, grad_norm=86.590, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.180e-04, train_time=3.337 +[gpub001:0/64] 2023-07-04 08:46:13,955 (trainer:732) INFO: 10epoch:train:9801-9900batch: iter_time=1.012e-04, forward_time=0.147, loss_ctc=81.321, loss_att=58.560, acc=0.675, loss=65.388, backward_time=1.046, grad_norm=87.678, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.179e-04, train_time=3.106 +[gpub001:0/64] 2023-07-04 08:48:56,080 (trainer:732) INFO: 10epoch:train:9901-10000batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=75.337, loss_att=58.584, acc=0.688, loss=63.610, backward_time=1.058, grad_norm=81.275, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.179e-04, train_time=3.242 +[gpub001:0/64] 2023-07-04 09:01:39,700 (trainer:338) INFO: 10epoch results: [train] iter_time=0.259, forward_time=0.156, loss_ctc=78.576, loss_att=61.451, acc=0.672, loss=66.588, backward_time=1.054, grad_norm=91.032, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.213e-04, train_time=3.698, time=5 hours, 8 minutes and 33.64 seconds, total_count=70000, gpu_max_cached_mem_GB=37.459, [valid] loss_ctc=58.858, cer_ctc=0.319, loss_att=49.021, acc=0.607, cer=0.458, wer=1.000, loss=51.972, time=6 minutes and 30.14 seconds, total_count=7590, gpu_max_cached_mem_GB=37.459, [att_plot] time=5 minutes and 50.7 seconds, total_count=0, gpu_max_cached_mem_GB=37.459 +[gpub001:0/64] 2023-07-04 09:01:59,109 (trainer:386) INFO: The best model has been updated: valid.total_count +[gpub001:0/64] 2023-07-04 09:02:00,803 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till10epoch.pth +[gpub001:0/64] 2023-07-04 09:02:44,194 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till10epoch.pth +[gpub001:0/64] 2023-07-04 09:02:51,878 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/5epoch.pth +[gpub001:0/64] 2023-07-04 09:02:51,945 (trainer:272) INFO: 11/100epoch started. Estimated time to finish: 2 weeks, 5 days and 19 hours +[gpub001:0/64] 2023-07-04 09:02:53,579 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub001:0/64] 2023-07-04 09:03:16,753 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 09:03:21,037 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 09:03:21,038 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, +[gpub001:0/64] 2023-07-04 09:03:21,282 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +[gpub001:0/64] 2023-07-04 09:11:53,137 (trainer:732) INFO: 11epoch:train:1-100batch: iter_time=3.940, forward_time=0.203, loss_ctc=69.654, loss_att=54.166, acc=0.663, loss=58.813, backward_time=1.045, grad_norm=79.216, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.187, optim0_lr0=1.178e-04, train_time=10.802 +[gpub001:0/64] 2023-07-04 09:14:10,915 (trainer:732) INFO: 11epoch:train:101-200batch: iter_time=1.300e-04, forward_time=0.146, loss_ctc=88.708, loss_att=62.409, acc=0.670, loss=70.299, backward_time=1.030, grad_norm=109.268, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.184, optim0_lr0=1.178e-04, train_time=2.755 +[gpub001:0/64] 2023-07-04 09:16:26,411 (trainer:732) INFO: 11epoch:train:201-300batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=75.382, loss_att=62.579, acc=0.655, loss=66.420, backward_time=1.027, grad_norm=83.304, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.177e-04, train_time=2.710 +[gpub001:0/64] 2023-07-04 09:18:42,130 (trainer:732) INFO: 11epoch:train:301-400batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=78.822, loss_att=59.346, acc=0.668, loss=65.189, backward_time=1.026, grad_norm=100.773, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.176e-04, train_time=2.714 +[gpub001:0/64] 2023-07-04 09:21:02,338 (trainer:732) INFO: 11epoch:train:401-500batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=76.393, loss_att=64.269, acc=0.659, loss=67.906, backward_time=1.036, grad_norm=116.385, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.184, optim0_lr0=1.176e-04, train_time=2.804 +[gpub001:0/64] 2023-07-04 09:23:33,454 (trainer:732) INFO: 11epoch:train:501-600batch: iter_time=1.256e-04, forward_time=0.144, loss_ctc=76.583, loss_att=58.803, acc=0.655, loss=64.137, backward_time=1.050, grad_norm=86.037, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.175e-04, train_time=3.022 +[gpub001:0/64] 2023-07-04 09:26:08,811 (trainer:732) INFO: 11epoch:train:601-700batch: iter_time=1.229e-04, forward_time=0.146, loss_ctc=91.392, loss_att=65.716, acc=0.662, loss=73.419, backward_time=1.075, grad_norm=105.260, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.174e-04, train_time=3.107 +[gpub001:0/64] 2023-07-04 09:28:29,823 (trainer:732) INFO: 11epoch:train:701-800batch: iter_time=1.309e-04, forward_time=0.144, loss_ctc=80.913, loss_att=63.348, acc=0.638, loss=68.618, backward_time=1.034, grad_norm=106.285, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.174e-04, train_time=2.820 +[gpub001:0/64] 2023-07-04 09:31:05,097 (trainer:732) INFO: 11epoch:train:801-900batch: iter_time=3.980e-04, forward_time=0.269, loss_ctc=82.733, loss_att=69.706, acc=0.657, loss=73.614, backward_time=1.058, grad_norm=122.936, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.189, optim0_lr0=1.173e-04, train_time=3.105 +[gpub001:0/64] 2023-07-04 09:33:40,369 (trainer:732) INFO: 11epoch:train:901-1000batch: iter_time=1.140e-04, forward_time=0.148, loss_ctc=67.176, loss_att=55.082, acc=0.663, loss=58.710, backward_time=1.066, grad_norm=79.860, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.172e-04, train_time=3.105 +[gpub001:0/64] 2023-07-04 09:33:57,353 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub001:0/64] 2023-07-04 09:34:19,685 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub001:0/64] 2023-07-04 09:34:23,885 (abs_task:1570) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"} + preprocess: ) +[gpub001:0/64] 2023-07-04 09:34:23,885 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, +[gpub001:0/64] 2023-07-04 09:34:23,892 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129 +Traceback (most recent call last): + File "", line 1, in + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main + exitcode = _main(fd, parent_sentinel) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 126, in _main + self = reduction.pickle.load(from_parent) +_pickle.UnpicklingError: pickle data was truncated +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 140, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with signal SIGKILL +Traceback (most recent call last): + File "", line 1, in + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main + exitcode = _main(fd, parent_sentinel) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 126, in _main + self = reduction.pickle.load(from_parent) +_pickle.UnpicklingError: pickle data was truncated +Traceback (most recent call last): + File "", line 1, in + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main + exitcode = _main(fd, parent_sentinel) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 126, in _main + self = reduction.pickle.load(from_parent) +_pickle.UnpicklingError: pickle data was truncated +Traceback (most recent call last): + File "", line 1, in + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main + exitcode = _main(fd, parent_sentinel) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 126, in _main + self = reduction.pickle.load(from_parent) +_pickle.UnpicklingError: pickle data was truncated +slurmstepd: error: Detected 1 oom-kill event(s) in StepId=2121665.0. Some of your processes may have been killed by the cgroup out-of-memory handler. +srun: error: gpub001: task 0: Out Of Memory +gpub022:3399535:3399624 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub022:3399536:3399623 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub022:3399537:3399622 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub022:3399534:3399625 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub022:3399536:3399536 [2] NCCL INFO comm 0x93f2210 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 43] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 42] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 50] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 49] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 40] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 41] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 48] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 51] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub076:3343846:3343926 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub076:3343843:3343928 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub076:3343845:3343927 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub066:1432047:1432134 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub066:1432046:1432136 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub066:1432048:1432137 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub022:3399537:3399537 [3] NCCL INFO comm 0x50214710 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub022:3399535:3399535 [1] NCCL INFO comm 0x4fa312f0 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub022:3399534:3399534 [0] NCCL INFO comm 0x50711f50 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub066:1432048:1432069 [0] NCCL INFO comm 0x51126a70 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub066:1432047:1432070 [0] NCCL INFO comm 0x9ed0150 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub076:3343845:3343867 [0] NCCL INFO comm 0x4fe2ad90 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub076:3343846:3343866 [0] NCCL INFO comm 0x50888c10 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub066:1432046:1432068 [0] NCCL INFO comm 0x4fabed20 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub076:3343844:3343868 [0] NCCL INFO comm 0xb838ee00 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub076:3343843:3343869 [0] NCCL INFO comm 0x508de3f0 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub066:1432045:1432071 [0] NCCL INFO comm 0x50653520 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 51] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 19] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800147 milliseconds before timing out. +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 48] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 50] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 49] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 18] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800146 milliseconds before timing out. +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 16] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800157 milliseconds before timing out. +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 17] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800147 milliseconds before timing out. +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 42] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 43] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 41] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 40] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 27] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 46] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 44] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 25] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 47] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 59] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 24] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 58] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 57] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 26] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 45] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub031:1878314:1878398 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub031:1878313:1878397 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub079:2616804:2616888 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub079:2616805:2616890 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub067:1390514:1390593 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub067:1390516:1390596 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub067:1390513:1390595 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +[W ProcessGroupNCCL.cpp:948] [Rank 56] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub067:1390516:1390538 [0] NCCL INFO comm 0x509fc1c0 rank 47 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub079:2616806:2616829 [0] NCCL INFO comm 0x89762f0 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub031:1878312:1878336 [0] NCCL INFO comm 0x509faf60 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub079:2616805:2616827 [0] NCCL INFO comm 0x8b2c9c20 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub067:1390514:1390537 [0] NCCL INFO comm 0xa70b75d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub031:1878313:1878334 [0] NCCL INFO comm 0xa54f400 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub067:1390513:1390536 [0] NCCL INFO comm 0x4ef73970 rank 44 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub079:2616804:2616830 [0] NCCL INFO comm 0x9014adc0 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub067:1390515:1390535 [0] NCCL INFO comm 0x5030f0d0 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub031:1878314:1878335 [0] NCCL INFO comm 0x511daaa0 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub079:2616803:2616828 [0] NCCL INFO comm 0xa9779a50 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub031:1878311:1878337 [0] NCCL INFO comm 0xba515710 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 27] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 59] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 58] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 46] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 56] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 57] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 44] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 25] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 26] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 45] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 24] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 47] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 14] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 15] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 13] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 12] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub016:1380823:1380905 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub016:1380823:1380846 [0] NCCL INFO comm 0x517fee10 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 54] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 55] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 52] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 53] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub016:1380824:1380845 [0] NCCL INFO comm 0x8d241cc0 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub016:1380822:1380843 [0] NCCL INFO comm 0x9b8bb7a0 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub077:252894:252972 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub077:252895:252971 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub077:252894:252916 [0] NCCL INFO comm 0xc19a4b40 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub077:252893:252970 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub077:252893:252914 [0] NCCL INFO comm 0x509e6280 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub077:252895:252913 [0] NCCL INFO comm 0x9491900 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub077:252892:252969 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub077:252892:252915 [0] NCCL INFO comm 0x97aafd0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub016:1380821:1380844 [0] NCCL INFO comm 0x50896990 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 14] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 54] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 53] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 13] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 55] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 15] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 52] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 21] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 23] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 22] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub030:2310660:2310736 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +[W ProcessGroupNCCL.cpp:948] [Rank 20] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub030:2310660:2310681 [0] NCCL INFO comm 0xa84d3a10 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 12] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub030:2310658:2310733 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub030:2310659:2310682 [0] NCCL INFO comm 0x8de12f60 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub030:2310658:2310680 [0] NCCL INFO comm 0x50672d50 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 38] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 39] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 37] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 36] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub060:1938146:1938228 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub060:1938145:1938226 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub060:1938144:1938225 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub060:1938146:1938170 [0] NCCL INFO comm 0x50addeb0 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub060:1938145:1938171 [0] NCCL INFO comm 0xb591e2d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub060:1938144:1938172 [0] NCCL INFO comm 0x4f3bc650 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub030:2310657:2310679 [0] NCCL INFO comm 0x50d929d0 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 62] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 63] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub096:1440104:1440184 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub096:1440103:1440185 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +[W ProcessGroupNCCL.cpp:948] [Rank 60] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 61] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub096:1440102:1440186 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub096:1440103:1440125 [0] NCCL INFO comm 0x91c6060 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub096:1440104:1440126 [0] NCCL INFO comm 0x9f265ce0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub096:1440102:1440128 [0] NCCL INFO comm 0x50d96930 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub032:3246893:3246982 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub032:3246893:3246893 [1] NCCL INFO comm 0x9a6ad00 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 23] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 21] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 38] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 22] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub060:1938143:1938169 [0] NCCL INFO comm 0x50561020 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 20] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub096:1440101:1440127 [0] NCCL INFO comm 0x50b020d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 39] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 37] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 35] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 33] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +[W ProcessGroupNCCL.cpp:948] [Rank 32] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 62] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 11] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub059:1894384:1894465 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub059:1894386:1894467 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +[W ProcessGroupNCCL.cpp:948] [Rank 9] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 8] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub059:1894386:1894405 [0] NCCL INFO comm 0x9cf1390 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +[W ProcessGroupNCCL.cpp:948] [Rank 10] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 63] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 34] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub059:1894385:1894466 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: [Rank 29] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800044 milliseconds before timing out. +gpub059:1894385:1894403 [0] NCCL INFO comm 0x50af3510 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub015:828881:828961 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub015:828878:828959 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub015:828879:828960 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub015:828880:828958 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +[W ProcessGroupNCCL.cpp:948] [Rank 4] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 7] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 5] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub015:828879:828899 [0] NCCL INFO comm 0x8ad4b90 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub059:1894384:1894406 [0] NCCL INFO comm 0xb7b49460 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 61] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 6] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub015:828880:828901 [0] NCCL INFO comm 0x9e67ed0 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub015:828881:828898 [0] NCCL INFO comm 0xb64dad10 rank 11 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub002:1756560:1756644 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub002:1756561:1756645 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub002:1756562:1756643 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub015:828878:828900 [0] NCCL INFO comm 0x8fc63100 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +gpub002:1756560:1756584 [0] NCCL INFO comm 0x17829840 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE +gpub002:1756561:1756582 [0] NCCL INFO comm 0x51ad54d0 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +gpub002:1756562:1756583 [0] NCCL INFO comm 0x9ca8ab90 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub002:1756559:1756646 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub002:1756559:1756581 [0] NCCL INFO comm 0x51930090 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 36] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 60] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub059:1894383:1894404 [0] NCCL INFO comm 0x510467d0 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 35] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +[W ProcessGroupNCCL.cpp:948] [Rank 30] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 9] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 31] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +[W ProcessGroupNCCL.cpp:948] [Rank 28] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub032:3246895:3246981 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub032:3246895:3246917 [0] NCCL INFO comm 0x1b5e5670 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE +gpub032:3246894:3246916 [0] NCCL INFO comm 0x9ddee7e0 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 5] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 10] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 7] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-2: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 33] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 6] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 8] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 11] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 4] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator was aborted on rank 34. Original reason for failure was: [Rank 34] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 32] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +gpub032:3246892:3246918 [0] NCCL INFO comm 0x4ff3dba0 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE +Process SpawnProcess-4: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 31] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-3: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch + retval = model(**batch) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl + return forward_call(*input, **kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward + self._sync_buffers() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers + self._sync_module_buffers(authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers + self._default_broadcast_coalesced(authoritative_rank=authoritative_rank) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced + self._distributed_broadcast_coalesced( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced + dist._broadcast_coalesced( +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 30] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Process SpawnProcess-1: +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap + self.run() + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run + self._target(*self._args, **self._kwargs) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker + cls.trainer.run( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run + all_steps_are_invalid = cls.train_one_epoch( + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch + torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM) + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce + work.wait() +RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 28] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +srun: error: gpub067: task 11: Exited with exit code 1 +srun: error: gpub022: task 4: Exited with exit code 1 +srun: error: gpub031: task 6: Exited with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +srun: error: gpub096: task 15: Exited with exit code 1 +srun: error: gpub032: task 7: Exited with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 +srun: error: gpub066: task 10: Exited with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + main() + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + main() + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + S2TTask.main(cmd=cmd) + S2TTask.main(cmd=cmd) + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1 + raise ProcessExitedException( + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 + raise ProcessExitedException( + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1 +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main +Traceback (most recent call last): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + return _run_code(code, main_globals, None, + raise ProcessExitedException( + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code +torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1 + exec(code, run_globals) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in + main() + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main + S2TTask.main(cmd=cmd) + File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main + while not ProcessContext(processes, error_queues).join(): + File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join + raise ProcessExitedException( +torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1 +srun: error: gpub060: task 9: Exited with exit code 1 +srun: error: gpub076: task 12: Exited with exit code 1 +srun: error: gpub079: task 14: Exited with exit code 1 +srun: error: gpub077: task 13: Exited with exit code 1 +srun: error: gpub059: task 8: Exited with exit code 1 +srun: error: gpub030: task 5: Exited with exit code 1 +srun: error: gpub002: task 1: Exited with exit code 1 +srun: error: gpub016: task 3: Exited with exit code 1 +srun: error: gpub015: task 2: Exited with exit code 1 +# Accounting: begin_time=1688441050 +# Accounting: end_time=1688483245 +# Accounting: time=42195 threads=1 +# Finished at Tue Jul 4 10:07:25 CDT 2023 with status 1 diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log new file mode 100644 index 0000000000000000000000000000000000000000..d2fb98012b3fefc71cd385147b7a24d9099386bb --- /dev/null +++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log @@ -0,0 +1,1294 @@ +# Running on gpub074.delta.ncsa.illinois.edu +# Started at Sun Jul 16 00:42:43 CDT 2023 +# SLURMD_NODENAME=gpub074 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2179250 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE=64 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2179250 +# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log +# SLURM_JOB_NODELIST=gpub074 +# SLURM_JOB_NUM_NODES=1 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=1 +# SLURM_NODEID=0 +# SLURM_NODELIST=gpub074 +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1 +# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu +# SLURM_TASKS_PER_NODE=1 +# SLURM_TASK_PID=4188774 +# SLURM_TOPOLOGY_ADDR=ss00.ss12.gpub074 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109 +# python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed True +/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed True +[gpub074:0/4] 2023-07-16 00:44:44,966 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub074:0/4] 2023-07-16 00:44:44,967 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes. +[gpub074:0/4] 2023-07-16 00:44:45,026 (s2t:483) INFO: Vocabulary size: 50002 +[gpub074:0/4] 2023-07-16 00:44:56,171 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub074:0/4] 2023-07-16 00:44:56,225 (abs_task:1202) INFO: Model structure: +ESPnetS2TModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): TransformerEncoder( + (embed): Conv2dSubsampling( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + ) + (out): Sequential( + (0): Linear(in_features=19456, out_features=1024, bias=True) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + ) + (decoder): TransformerDecoder( + (embed): Sequential( + (0): Embedding(50002, 1024) + (1): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (output_layer): Linear(in_features=1024, out_features=50002, bias=True) + (decoders): MultiSequential( + (0): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (4): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (5): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (6): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (7): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (8): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (9): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (10): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (11): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (12): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (13): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (14): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (15): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (16): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (17): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (18): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (19): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (20): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (21): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (22): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (23): DecoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (src_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + (criterion_att): LabelSmoothingLoss( + (criterion): KLDivLoss() + ) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TModel + Total Number of model parameters: 888.51 M + Number of trainable parameters: 888.51 M (100.0%) + Size: 3.55 GB + Type: torch.float32 +[gpub074:0/4] 2023-07-16 00:44:56,225 (abs_task:1205) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.00025 + lr: 2.5e-08 + maximize: False + weight_decay: 0.0 +) +[gpub074:0/4] 2023-07-16 00:44:56,225 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000) +[gpub074:0/4] 2023-07-16 00:44:56,240 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml +[gpub074:0/4] 2023-07-16 00:44:56,958 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth +[gpub074:0/4] 2023-07-16 00:45:04,695 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub074:0/4] 2023-07-16 00:45:04,919 (abs_task:1570) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub074:0/4] 2023-07-16 00:45:04,919 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub074:0/4] 2023-07-16 00:45:04,927 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129 +[gpub074:0/4] 2023-07-16 00:45:05,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub074:0/4] 2023-07-16 00:45:05,815 (abs_task:1570) INFO: [plot_att] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev/text", "type": "text"} + preprocess: ) +[gpub074:0/4] 2023-07-16 00:45:05,815 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub074:0/4] 2023-07-16 00:45:05,815 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1 +[gpub074:0/4] 2023-07-16 00:45:33,488 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth +[gpub074:0/4] 2023-07-16 00:45:33,492 (trainer:218) WARNING: The training has already reached at max_epoch: 56 +gpub074:4188818:4188818 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4188818:4188818 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4188818:4188818 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.14.3+cuda11.7 +gpub074:4188819:4188819 [1] NCCL INFO cudaDriverVersion 12010 +gpub074:4188819:4188819 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4188819:4188819 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4188819:4188945 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4188819:4188945 [1] NCCL INFO Using network IB +gpub074:4188819:4188945 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub074:4188819:4188945 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 [2] 2/-1/-1->1->0 [3] 2/-1/-1->1->0 +gpub074:4188819:4188945 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub074:4188819:4188945 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub074:4188819:4188945 [1] NCCL INFO Channel 02/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub074:4188819:4188945 [1] NCCL INFO Channel 03/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub074:4188819:4188945 [1] NCCL INFO Connected all rings +gpub074:4188819:4188945 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub074:4188819:4188945 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub074:4188819:4188945 [1] NCCL INFO Channel 02/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub074:4188819:4188945 [1] NCCL INFO Channel 03/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub074:4188819:4188945 [1] NCCL INFO Connected all trees +gpub074:4188819:4188945 [1] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 512 | 512 +gpub074:4188819:4188945 [1] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer +gpub074:4188819:4188945 [1] NCCL INFO comm 0xa71a630 rank 1 nranks 4 cudaDev 1 busId 46000 - Init COMPLETE +gpub074:4188821:4188821 [3] NCCL INFO cudaDriverVersion 12010 +gpub074:4188821:4188821 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4188821:4188821 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4188821:4188946 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4188821:4188946 [3] NCCL INFO Using network IB +gpub074:4188821:4188946 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub074:4188821:4188946 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 [2] -1/-1/-1->3->2 [3] -1/-1/-1->3->2 +gpub074:4188821:4188946 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 0[7000] via P2P/IPC +gpub074:4188821:4188946 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 0[7000] via P2P/IPC +gpub074:4188821:4188946 [3] NCCL INFO Channel 02/0 : 3[c7000] -> 0[7000] via P2P/IPC +gpub074:4188821:4188946 [3] NCCL INFO Channel 03/0 : 3[c7000] -> 0[7000] via P2P/IPC +gpub074:4188821:4188946 [3] NCCL INFO Connected all rings +gpub074:4188821:4188946 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub074:4188821:4188946 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub074:4188821:4188946 [3] NCCL INFO Channel 02/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub074:4188821:4188946 [3] NCCL INFO Channel 03/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub074:4188821:4188946 [3] NCCL INFO Connected all trees +gpub074:4188821:4188946 [3] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 512 | 512 +gpub074:4188821:4188946 [3] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer +gpub074:4188821:4188946 [3] NCCL INFO comm 0xa2e9cb0 rank 3 nranks 4 cudaDev 3 busId c7000 - Init COMPLETE +gpub074:4188820:4188820 [2] NCCL INFO cudaDriverVersion 12010 +gpub074:4188820:4188820 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0> +gpub074:4188820:4188820 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub074:4188820:4188947 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4188820:4188947 [2] NCCL INFO Using network IB +gpub074:4188820:4188947 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub074:4188820:4188947 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 [2] 3/-1/-1->2->1 [3] 3/-1/-1->2->1 +gpub074:4188820:4188947 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub074:4188820:4188947 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub074:4188820:4188947 [2] NCCL INFO Channel 02/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub074:4188820:4188947 [2] NCCL INFO Channel 03/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub074:4188820:4188947 [2] NCCL INFO Connected all rings +gpub074:4188820:4188947 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub074:4188820:4188947 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub074:4188820:4188947 [2] NCCL INFO Channel 02/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub074:4188820:4188947 [2] NCCL INFO Channel 03/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub074:4188820:4188947 [2] NCCL INFO Connected all trees +gpub074:4188820:4188947 [2] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 512 | 512 +gpub074:4188820:4188947 [2] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer +gpub074:4188820:4188947 [2] NCCL INFO comm 0x4f8c6c10 rank 2 nranks 4 cudaDev 2 busId 85000 - Init COMPLETE +gpub074:4188819:4188953 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +gpub074:4188819:4188819 [1] NCCL INFO comm 0xa71a630 rank 1 nranks 4 cudaDev 1 busId 46000 - Abort COMPLETE +gpub074:4188821:4188955 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +gpub074:4188821:4188821 [3] NCCL INFO comm 0xa2e9cb0 rank 3 nranks 4 cudaDev 3 busId c7000 - Abort COMPLETE +gpub074:4188820:4188952 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +gpub074:4188820:4188820 [2] NCCL INFO comm 0x4f8c6c10 rank 2 nranks 4 cudaDev 2 busId 85000 - Abort COMPLETE +[gpub074:0/4] 2023-07-16 00:45:37,470 (trainer:458) INFO: The training was finished at 55 epochs +[gpub074:0/4] 2023-07-16 00:45:37,508 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.pth +[gpub074:0/4] 2023-07-16 00:46:24,407 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.pth +gpub074:4188818:4188944 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0> +gpub074:4188818:4188944 [0] NCCL INFO Using network IB +gpub074:4188818:4188944 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub074:4188818:4188944 [0] NCCL INFO Channel 00/04 : 0 1 2 3 +gpub074:4188818:4188944 [0] NCCL INFO Channel 01/04 : 0 1 2 3 +gpub074:4188818:4188944 [0] NCCL INFO Channel 02/04 : 0 1 2 3 +gpub074:4188818:4188944 [0] NCCL INFO Channel 03/04 : 0 1 2 3 +gpub074:4188818:4188944 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1 [2] 1/-1/-1->0->-1 [3] 1/-1/-1->0->-1 +gpub074:4188818:4188944 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub074:4188818:4188944 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub074:4188818:4188944 [0] NCCL INFO Channel 02/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub074:4188818:4188944 [0] NCCL INFO Channel 03/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub074:4188818:4188944 [0] NCCL INFO Connected all rings +gpub074:4188818:4188944 [0] NCCL INFO Connected all trees +gpub074:4188818:4188944 [0] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 512 | 512 +gpub074:4188818:4188944 [0] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer +gpub074:4188818:4188944 [0] NCCL INFO comm 0x4f7f3ad0 rank 0 nranks 4 cudaDev 0 busId 7000 - Init COMPLETE +gpub074:4188818:4188954 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +gpub074:4188818:4188818 [0] NCCL INFO comm 0x4f7f3ad0 rank 0 nranks 4 cudaDev 0 busId 7000 - Abort COMPLETE +# Accounting: begin_time=1689486163 +# Accounting: end_time=1689486432 +# Accounting: time=269 threads=1 +# Finished at Sun Jul 16 00:47:12 CDT 2023 with status 0